This version
Current version: 1
Date report generated: 15 December, 2020
Report prepared for: Name
Purpose of report:
Previous revisions
N/A
This report is meant to help explore DESeq2 results and was generated using RMarkdown. This section contains the code for setting up the rest of the report.
## Define paths
paths <- list()
paths$root <- params$projectdir
paths$data <- paste0(paths$root, "/data")
paths$raw <- paste0(paths$data, "/raw")
paths$output <- paste0(paths$data, "/output")
paths$processed <- paste0(paths$data, "/processed")
paths$metadata <- paste0(paths$root, "/metadata")
paths$reports <- paste0(paths$root, "/reports")
if(is.na(params$group_facet)) {
paths$DEG_output <- paste0(paths$root, "/DEG_output")
} else {
paths$DEG_output <- paste0(paths$root, "/DEG_output/group_", params$group_filter)
}
paths$RData <- paste0(paths$DEG_output, "/RData")
## knitrBoostrap and device chunk options
# Set so that long lines in R will be wrapped:
knitr::opts_chunk$set(bootstrap.show.code = FALSE,
bootstrap.panel = TRUE,
cache = FALSE)
#knitr::tidy.opts=list(width.cutoff=60), tidy=TRUE, crop = NULL # Use for PDFs
knitr::opts_knit$set(root.dir = paths$root)
# Set this variable to FALSE if you only want to run the plotting functions by reloading the last RData file.
flag=params$flag # To evaluate analysis chunks
if (flag==FALSE) { load(paste0(params$projectdir,"Partial_analysis.RData")) }
# Set this variable to TRUE if you would like to embed the files directly into the HTML for portability. This slows down page responsiveness drastically, since the files are generally quite large.
embedFiles=FALSE
# Se this variable to be TRUE if you want to have separate plots of top genes as defined in the R-ODAF template
R_ODAF_plots=FALSE
pathway_analysis <- file.path(paths$root,"Rmd/Pathway_analysis.Rmd")
message("If this text is visible by default, this report was produced to test plotting functions and should be used exclusively for testing and development.")
#### Record start time
startTime <- Sys.time()
#### Load Libraries
# General purpose
# library('conflicted')
library('knitr')
library('kableExtra')
library('tidyverse')
library('magrittr')
library('ggplot2')
library('DT')
library('data.table')
library('pheatmap')
library('lattice')
library('tidytext')
library('openxlsx')
library('RColorBrewer')
library('viridis')
library('sessioninfo')
library('plotly')
#library('RMariaDB')
# Bioinformatics-specific
library('DESeq2')
library('edgeR')
library('enrichplot')
library('rWikiPathways')
library('BiocParallel')
library('clusterProfiler')
library('biomaRt')
library('AnnotationHub')
library('vsn')
# Load genome, depends on species
# To add: Zebrafish, Folsomia candida, others?
species <- params$species
if(species=="human"){
# Human:
library('org.Hs.eg.db')
orgdb <- "org.Hs.eg.db"
species_sci <- "Homo sapiens"
ensembl_species <- "hsapiens_gene_ensembl"
species_gene_symbol <- "external_gene_name"
kegg_organism <- "hsa"
temposeq_manifest <- "191113_Human_S1500_Surrogate_2.0_Manifest.csv" # or 191004 Human Whole Transcriptome 2.0 Manifest.xlsx
} else {
if(species=="mouse"){
# Mouse:
library('org.Mm.eg.db')
orgdb <- "org.Mm.eg.db"
species_sci <- "Mus musculus"
ensembl_species <- "mmusculus_gene_ensembl"
species_gene_symbol <- "mgi_symbol"
kegg_organism <- "mmu"
temposeq_manifest <- "181130 Mouse S1500+ Surrogate 1.2 Manifest.xlsx" # or 190603 Mouse Whole Transcriptome 1.1 Manifest.xlsx
}
else {
if(species=="rat"){
# Rat:
library('org.Rn.eg.db')
orgdb <- "org.Rn.eg.db"
species_sci <- "Rattus norvegicus"
ensembl_species <- "rnorvegicus_gene_ensembl"
species_gene_symbol <- "mgi_symbol"
kegg_organism <- "rno"
temposeq_manifest <- "190809 Rat Whole Transcriptome 1.0 Manifest.xlsx"
}
else {
if(species=="hamster"){
# Golden hamster:
OrgDb.Ma <- query(AnnotationHub(), c("OrgDb","Mesocricetus auratus"))[[1]]
orgdb <- "OrgDb.Ma"
species_sci <- "Mesocricetus auratus"
ensembl_species <- "mauratus_gene_ensembl"
species_gene_symbol <- "external_gene_name"
}
else {
message("No species picked!")
}
}
}
}
# Options
options(java.parameters = "-Xmx10000m")
theme_set(theme_bw())
The code above (not shown by default) loads the relevant R packages required for analysis.
###################################################################################
###################################################################################
# PARAMETERS TO SET MANUALLY
#
# Set file locations
if(!dir.exists(paths$DEG_output)){
dir.create(paths$DEG_output, recursive = TRUE)
dir.create(paths$RData, recursive = TRUE)
}
# FILES TO LOAD
# A. Tab delimited file with merged RSEM.genes.results files:
SampleDataFile <- file.path(paths$processed,"genes.data.tsv")
# B. Tab delimited sample information file with at least 2 columns:
# 1. sample names identical to the column names of sampleData
# 2. compound/group/whatever (needs to identify to which experimental group the sample belongs)
SampleKeyFile <- file.path(paths$metadata,"metadata.txt")
# C. Tab delimited file of comparisons (contrasts) to test
# Group of interest in the left column, control for comparison in the right column
ContrastsFile <- file.path(paths$metadata,"contrasts.txt")
# Specify which groups need to be compared
contrasts <- read.delim(ContrastsFile, stringsAsFactors=FALSE, sep="\t", header=FALSE, quote="\"")
short_contrast_names <- paste(contrasts$V1,"v.",contrasts$V2) # Customize these for your experiment... Must be short enough to fit as Excel tab titles.
DESIGN <- params$design # Column name which defines the groups to be compared
intgroup <- params$intgroup # "Interesting groups" - an experimental group and/or covariates
# Various plotting and display options
nBestFeatures <- 20 # The number of best features to make plots of their counts
nBest <- 100 # Number of features to include in table and limiting PCA/clustering analysis
nHeatmap <- 50 # Number of most variable genes for heatmap
nHeatmapDEGs <- 50 # Number of DEGs for heatmap
# Set analysis ID. This ID will be used as prefix for the output files
# Normally, as follows: year - project_name - group_filter
if(is.na(params$group_filter)){
analysisID <- paste(format(Sys.time(), '%Y'), params$project_name, sep="_")
} else {
analysisID <- paste(format(Sys.time(), '%Y'), params$project_name, params$group_filter, sep="_")
}
# Specify used platform/technology for data generation:
Platform <- params$platform # Should be one of "RNA-Seq" or "TempO-Seq"
NORM_TYPE <- paste0(analysisID, "_DESeq2_", Platform)
if(Platform=="TempO-Seq"){sampledata_sep <- ","} else {sampledata_sep <- "\t"}
# Misc parameters
digits = 2 # For rounding numbers
The code above (not shown by default) specifies user preferences and data locations.
# Load input files
sampleData <- read.delim(SampleDataFile,
sep=sampledata_sep,
stringsAsFactors=FALSE,
header=TRUE,
quote="\"",
row.names=1,
check.names=FALSE)
DESeqDesign <- read.delim(SampleKeyFile,
stringsAsFactors=FALSE,
sep="\t",
header=TRUE,
quote="\"",
row.names=1) # Pick column that is used in ID; might be more appropriate to change this!
DESeqDesign$original_names <- rownames(DESeqDesign)
DESeqDesignAsRead <- DESeqDesign
# Parameter-specified exclusions
# Conditionally exclude contrasts and metadata that should be filtered
# 1. Filter by group filter - useful for selecting one treatment at a time. params$group_filter
# 2. Filter manually - useful for removing irrelevant control groups (e.g. reference RNA). params$exclude_groups
# Note that these should only be run if the respective variables are set.
if(!is.na(params$group_facet)) {
contrasts_to_filter <- DESeqDesign %>%
dplyr::filter(!!sym(params$group_facet)==params$group_filter) %>%
pull(params$design) %>%
unique()
contrasts <- contrasts %>% dplyr::filter(V1 %in% contrasts_to_filter)
DESeqDesign <- DESeqDesign %>% dplyr::filter(!!sym(params$design) %in% (unlist(contrasts) %>% unique()) )
}
if(any(!is.na(params$exclude_samples))) {
DESeqDesign <- DESeqDesign %>% dplyr::filter(!original_names %in% params$exclude_samples)
}
if(any(!is.na(params$exclude_groups))) {
DESeqDesign <- DESeqDesign %>% dplyr::filter(!(!!sym(params$design)) %in% params$exclude_groups)
contrasts_to_filter <- DESeqDesign %>%
dplyr::filter(!(!!sym(params$design)) %in% params$exclude_groups) %>%
pull(params$design) %>%
unique()
contrasts <- contrasts %>% dplyr::filter(V1 %in% contrasts_to_filter)
}
if(!is.na(params$include_only_column) & !is.na(params$include_only_group)) {
DESeqDesign <- DESeqDesign %>% dplyr::filter((!!sym(params$include_only_column)) %in% params$include_only_group)
limit_contrasts <- DESeqDesign %>% pull(!!sym(params$design)) %>% unique() %>% as.character()
contrasts <- contrasts %>% dplyr::filter(V1 %in% limit_contrasts)
}
# Create directories
plotdir <- paste(paths$DEG_output, "/plots/", sep="")
if(!dir.exists(plotdir)) {dir.create(plotdir, recursive = TRUE)}
barplot.dir <- paste(plotdir, "/barplot_genes/", sep="")
if(!dir.exists(barplot.dir)) {dir.create(barplot.dir, recursive = TRUE)}
#Set parameters according to platform
if (Platform=="RNA-Seq"){
threshold = 1000000 # Number of aligned reads per sample required
MinCount <- 1
alpha <- pAdjValue <- 0.05 # Relaxed from 0.01
linear_fc_filter = 1.5
biomart_filter="ensembl_gene_id"
} else if (Platform=="TempO-Seq") {
threshold = 100000 # Number of aligned reads per sample required
MinCount<- 0.5
alpha <- pAdjValue <- 0.05
linear_fc_filter = 1.5
biomart_filter="external_gene_name"
biospyder <- read.delim(file.path("~/shared/dbs/biospyder/", temposeq_manifest), # Assay manifest...
stringsAsFactors=FALSE,
sep="\t",
header=TRUE,
quote="\"")
biospyder_annos <- biospyder %>% dplyr::select(PROBE_NAME, ENSEMBL_GENE_ID, GENE_SYMBOL)
sampleData$PROBE_NAME <- row.names(sampleData)
sampleData <- sampleData %>%
mutate(GENE_SYMBOL = str_replace(PROBE_NAME, "_.*", "")) %>%
dplyr::group_by(GENE_SYMBOL) %>%
dplyr::summarise_if(.predicate = function(x) is.numeric(x),
.funs = c("sum")) %>%
ungroup()
sampleData <- as.data.frame(sampleData)
row.names(sampleData) <- sampleData$GENE_SYMBOL
sampleData <- sampleData[,-1]
# biospyder %>% select(ENSEMBL_GENE_ID) %>% distinct() %>% count()
# sampleData <- sampleData[row.names(sampleData) %in% biospyder$ENSEMBL_GENE_ID,] # If aligned to hg38...
} else {
print("Platform/technology not recognized")
}
The code above (not shown by default) loads user-provided sample meta data (i.e., information about your experiment, also known as colData, or, column data). This also imports the count matrix (i.e., a table of observed counts in which each sample is a column and genes are rows).
The experimental comparisons of interest to be tested in this report are as follows:
knitr::kable(contrasts,
row.names = F,
col.names = c("Group of interest", "Control for comparison"),
caption="Contrasts requested for this analysis.") %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
scroll_box(height = "480px")
| Group of interest | Control for comparison |
|---|---|
| female_2 | female_0 |
| female_1000 | female_0 |
| female_5000 | female_0 |
##########
# DESeq2 #
##########
print(NORM_TYPE) # Name of experiment
## [1] "2020_Flame_retardants_female_DESeq2_RNA-Seq"
# First data clean-up: replace NA & remove samples with total readcount < threshold
initialSampleDataCount <- ncol(sampleData)
sampleData[ is.na(sampleData) ] <- 0
sampleData <- sampleData[,(colSums(sampleData) > threshold)] # 1 million reads required per sample
filteredSampleDataCount <- ncol(sampleData)
# Sometimes extra cleanup may be needed
# colnames(sampleData) <- gsub(pattern="^0", replacement="", x=colnames(sampleData))
samples_before <- nrow(DESeqDesign)
# Sanity check: each sample (row) in the metadata should have a corresponding column in the count data
metadata_in_sampledata <- all(rownames(DESeqDesign) %in% colnames(sampleData))
# Sanity check: each column in the count data should have a corresponding sample (row) in the metadata
sampledata_in_metadata <- all(colnames(sampleData) %in% rownames(DESeqDesign))
# Find samples that were removed because they weren't in metadata
removed <- colnames(sampleData[which(!colnames(sampleData) %in% rownames(DESeqDesign))])
# Reorder the metadata table to correspond to the order of columns in the count data
DESeqDesign <- DESeqDesign[DESeqDesign$original_names %in% colnames(sampleData),]
DESeqDesign <- na.omit(DESeqDesign)
sampleData <- sampleData[,(rownames(DESeqDesign))]
samples_after <- nrow(DESeqDesign)
head(rownames(DESeqDesign))
## [1] "SRR5890392" "SRR5890389" "SRR5890396" "SRR5890393" "SRR5890429" "SRR5890370"
head(colnames(sampleData)) # Output should match
## [1] "SRR5890392" "SRR5890389" "SRR5890396" "SRR5890393" "SRR5890429" "SRR5890370"
intgroups <- params$intgroup
# Intgroups need to be factors for DESeq2
DESeqDesign[intgroups] <- lapply(DESeqDesign[intgroups], factor)
if(!is.na(params$design)) {
# If there is a dose column, reorder the experimental group (DESIGN) by dose.
if(any(grepl(x=colnames(DESeqDesign), pattern="dose", ignore.case = T))){
doseCol <- grep(x=colnames(DESeqDesign), pattern="dose", ignore.case = T)
design_factor_reordered <- factor(DESeqDesign[[params$design]],
levels=unique(DESeqDesign[[params$design]][order(DESeqDesign[[doseCol]])]),
ordered=FALSE)
DESeqDesign[[params$design]] <- design_factor_reordered
} else {
DESeqDesign[params$design] <- factor(DESeqDesign[,params$design])
}
}
if (file.exists(file.path(paths$RData, "dds.RData")) & is.na(params$group_facet) & params$use_cached_RData == TRUE) {
print(paste("Already found DESeq2 object from previous run; loading from disk."))
load(file.path(paths$RData,"dds.RData"))
if (!identical(as.data.frame(round(counts(dds))),
round(sampleData),0)) {
print("Not identical")
}
} else {
if(file.exists(file.path(paths$RData, paste0("dds_", params$group_filter, ".RData"))) & !is.na(params$group_facet) & params$use_cached_RData == TRUE) {
load(file=file.path(paths$RData, paste0("dds_", params$group_filter, ".RData")))
} else {
dds <- DESeqDataSetFromMatrix(countData = round(sampleData),
colData = as.data.frame(DESeqDesign),
design = formula(paste("~", DESIGN)) ) # replace with intgroups if you have a good reason to
dds <- dds[,rownames(DESeqDesign)]
dds <- dds[rowSums(counts(dds)) > 1]
dds <- DESeq(dds, parallel = TRUE, BPPARAM=MulticoreParam(params$cpus))
if(is.na(params$group_facet)) {
save(dds, file=file.path(paths$RData, "dds.RData"))
} else {
save(dds, file=file.path(paths$RData, paste0("dds_", params$group_filter, ".RData")))
}
}
}
# ### REMOVE if done above from scratch
# keep <- grep(colData(dds)$chemical, pattern="cells", invert=T, ignore.case = T)
# dds <- dds[,keep]
# dds <- dds[row.names(dds) %in% biospyder$ENSEMBL_GENE_ID,]
# Another sanity check to make sure the object looks correct
resultsNames(dds)
## [1] "Intercept" "group_female_2_vs_female_0" "group_female_1000_vs_female_0"
## [4] "group_female_5000_vs_female_0"
head(colData(dds))
## DataFrame with 6 rows and 8 columns
## name sex dose files group original_names sizeFactor replaceable
## <character> <character> <factor> <character> <factor> <character> <numeric> <logical>
## SRR5890392 female_0_bc41 female 0 bc41 female_0 SRR5890392 1.861961 TRUE
## SRR5890389 female_0_bc42 female 0 bc42 female_0 SRR5890389 1.293027 TRUE
## SRR5890396 female_0_bc43 female 0 bc43 female_0 SRR5890396 0.982415 TRUE
## SRR5890393 female_0_bc44 female 0 bc44 female_0 SRR5890393 0.891709 TRUE
## SRR5890429 female_0_bc45 female 0 bc45 female_0 SRR5890429 0.815912 TRUE
## SRR5890370 female_0_bc46 female 0 bc46 female_0 SRR5890370 0.976230 TRUE
head(assay(dds))
## SRR5890392 SRR5890389 SRR5890396 SRR5890393 SRR5890429 SRR5890370 SRR5890387 SRR5890381 SRR5890371
## ENSRNOG00000000001 0 0 0 0 0 0 0 0 0
## ENSRNOG00000000007 2 1 0 0 0 0 0 0 1
## ENSRNOG00000000010 0 0 0 0 0 0 0 0 0
## ENSRNOG00000000012 0 0 0 0 1 0 0 0 0
## ENSRNOG00000000017 1 0 0 1 1 0 2 0 3
## ENSRNOG00000000021 31 24 14 25 20 10 39 11 14
## SRR5890391 SRR5890421 SRR5890422 SRR5890383 SRR5890423 SRR5890363 SRR5890385 SRR5890415 SRR5890416
## ENSRNOG00000000001 0 0 0 0 0 2 0 0 0
## ENSRNOG00000000007 0 0 1 0 0 0 0 0 0
## ENSRNOG00000000010 0 0 0 0 0 0 0 0 0
## ENSRNOG00000000012 0 0 1 0 0 0 0 0 0
## ENSRNOG00000000017 0 0 1 0 0 0 0 3 0
## ENSRNOG00000000021 12 19 11 12 15 12 28 12 20
## SRR5890395 SRR5890434 SRR5890388 SRR5890431 SRR5890386 SRR5890440 SRR5890384 SRR5890405 SRR5890382
## ENSRNOG00000000001 0 0 0 0 0 0 1 0 0
## ENSRNOG00000000007 0 1 0 0 0 0 0 0 0
## ENSRNOG00000000010 0 0 0 0 0 0 0 0 0
## ENSRNOG00000000012 0 0 0 1 0 0 0 1 0
## ENSRNOG00000000017 0 0 1 1 0 0 2 1 1
## ENSRNOG00000000021 15 11 10 24 20 22 13 14 31
## SRR5890362 SRR5890394 SRR5890390 SRR5890372 SRR5890424 SRR5890425 SRR5890366 SRR5890428 SRR5890406
## ENSRNOG00000000001 0 0 0 0 0 0 1 0 0
## ENSRNOG00000000007 0 2 1 0 1 0 0 2 0
## ENSRNOG00000000010 0 0 0 0 0 0 0 0 0
## ENSRNOG00000000012 0 0 0 0 0 1 0 0 0
## ENSRNOG00000000017 0 0 0 0 2 0 0 0 1
## ENSRNOG00000000021 10 3 26 10 20 17 46 9 27
## SRR5890430 SRR5890437 SRR5890432
## ENSRNOG00000000001 0 2 0
## ENSRNOG00000000007 2 1 0
## ENSRNOG00000000010 0 2 0
## ENSRNOG00000000012 0 0 0
## ENSRNOG00000000017 0 1 2
## ENSRNOG00000000021 20 7 14
head(rowRanges(dds))
## GRangesList object of length 6:
## $ENSRNOG00000000001
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## -------
## seqinfo: no sequences
##
## $ENSRNOG00000000007
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## -------
## seqinfo: no sequences
##
## $ENSRNOG00000000010
## GRanges object with 0 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## -------
## seqinfo: no sequences
##
## ...
## <3 more elements>
str(counts(dds))
## int [1:18509, 1:39] 0 2 0 0 1 31 1550 25 143 36 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:18509] "ENSRNOG00000000001" "ENSRNOG00000000007" "ENSRNOG00000000010" "ENSRNOG00000000012" ...
## ..$ : chr [1:39] "SRR5890392" "SRR5890389" "SRR5890396" "SRR5890393" ...
# Make regularized log object for later plotting
#rld <- tryCatch(rlog(dds), error = function(e) { rlog(dds, fitType = 'mean') })
# Use vst for hundreds of samples!
rld <- vst(dds) # Should this be blind?
The code above (not shown by default) uses DESeq2 1.30.0 to test for deferentially abundant genes within the RNA-Seq data.
Prior to running DESeq2, the data was filtered to remove samples that do not have \(10^{6}\) reads per sample.
The user-provided metadata initially included 40 samples.
The count matrix initially included 80 samples (including any reference material samples). After removing samples with less than \(10^{6}\) reads, 76 samples were left. It is FALSE that all the samples provided in the metadata table were also identified in the count matrix. It is FALSE that all the samples in the count matrix were also identified in the metadata table.
Following the removal of other samples (i.e., reference RNA, etc.), there were 39 samples remaining in the experiment. The samples excluded from analysis are shown in the table in the section titled “Sample data”, along with complete sample metadata for the experiment.
This table shows the final list of samples that were used in the data analysis (as well as the corresponding sample information, e.g., to which experimental group samples belong).
# Conditionally sort by dose if that column name is present
if(any(grepl(x=colnames(DESeqDesign), pattern="dose", ignore.case = T))){
doseCol <- grep(x=colnames(DESeqDesign), pattern="dose", ignore.case = T)
knitr::kable(DESeqDesign %>% group_by(!!sym(params$design)) %>% arrange(doseCol, .by_group = T),
row.names = F,
caption="Samples and corresponding experimental conditions used in this report") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "480px")
} else {
knitr::kable(DESeqDesign %>% arrange(!!sym(params$design)),
row.names = F,
caption="Samples and corresponding experimental conditions used in this report") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "480px")
}
| name | sex | dose | files | group | original_names |
|---|---|---|---|---|---|
| female_0_bc41 | female | 0 | bc41 | female_0 | SRR5890392 |
| female_0_bc42 | female | 0 | bc42 | female_0 | SRR5890389 |
| female_0_bc43 | female | 0 | bc43 | female_0 | SRR5890396 |
| female_0_bc44 | female | 0 | bc44 | female_0 | SRR5890393 |
| female_0_bc45 | female | 0 | bc45 | female_0 | SRR5890429 |
| female_0_bc46 | female | 0 | bc46 | female_0 | SRR5890370 |
| female_0_bc47 | female | 0 | bc47 | female_0 | SRR5890387 |
| female_0_bc48 | female | 0 | bc48 | female_0 | SRR5890381 |
| female_0_bc49 | female | 0 | bc49 | female_0 | SRR5890371 |
| female_0_bc50 | female | 0 | bc50 | female_0 | SRR5890391 |
| female_2_bc51 | female | 2 | bc51 | female_2 | SRR5890421 |
| female_2_bc52 | female | 2 | bc52 | female_2 | SRR5890422 |
| female_2_bc53 | female | 2 | bc53 | female_2 | SRR5890383 |
| female_2_bc54 | female | 2 | bc54 | female_2 | SRR5890423 |
| female_2_bc55 | female | 2 | bc55 | female_2 | SRR5890363 |
| female_2_bc56 | female | 2 | bc56 | female_2 | SRR5890385 |
| female_2_bc57 | female | 2 | bc57 | female_2 | SRR5890415 |
| female_2_bc58 | female | 2 | bc58 | female_2 | SRR5890416 |
| female_2_bc59 | female | 2 | bc59 | female_2 | SRR5890395 |
| female_2_bc60 | female | 2 | bc60 | female_2 | SRR5890434 |
| female_1000_bc61 | female | 1000 | bc61 | female_1000 | SRR5890388 |
| female_1000_bc62 | female | 1000 | bc62 | female_1000 | SRR5890431 |
| female_1000_bc63 | female | 1000 | bc63 | female_1000 | SRR5890386 |
| female_1000_bc64 | female | 1000 | bc64 | female_1000 | SRR5890440 |
| female_1000_bc65 | female | 1000 | bc65 | female_1000 | SRR5890384 |
| female_1000_bc66 | female | 1000 | bc66 | female_1000 | SRR5890405 |
| female_1000_bc67 | female | 1000 | bc67 | female_1000 | SRR5890382 |
| female_1000_bc68 | female | 1000 | bc68 | female_1000 | SRR5890362 |
| female_1000_bc69 | female | 1000 | bc69 | female_1000 | SRR5890394 |
| female_1000_bc70 | female | 1000 | bc70 | female_1000 | SRR5890390 |
| female_5000_bc71 | female | 5000 | bc71 | female_5000 | SRR5890372 |
| female_5000_bc72 | female | 5000 | bc72 | female_5000 | SRR5890424 |
| female_5000_bc73 | female | 5000 | bc73 | female_5000 | SRR5890425 |
| female_5000_bc75 | female | 5000 | bc75 | female_5000 | SRR5890366 |
| female_5000_bc76 | female | 5000 | bc76 | female_5000 | SRR5890428 |
| female_5000_bc77 | female | 5000 | bc77 | female_5000 | SRR5890406 |
| female_5000_bc78 | female | 5000 | bc78 | female_5000 | SRR5890430 |
| female_5000_bc79 | female | 5000 | bc79 | female_5000 | SRR5890437 |
| female_5000_bc80 | female | 5000 | bc80 | female_5000 | SRR5890432 |
knitr::kable(DESeqDesign %>% group_by(!!sym(params$design)) %>% tally(),
row.names = F,
col.names = c("Experimental group", "Number of samples in group"),
caption="Number of samples in each experimental group used in this report") %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
scroll_box(width = "100%", height = "480px")
| Experimental group | Number of samples in group |
|---|---|
| female_0 | 10 |
| female_2 | 10 |
| female_1000 | 10 |
| female_5000 | 9 |
This table shows the samples that were removed from this analysis.
# Conditionally sort by dose if that column name is present?
knitr::kable(DESeqDesignAsRead %>% arrange(!!sym(params$design)) %>% dplyr::filter(!original_names %in% DESeqDesign$original_names),
row.names = F,
caption="Samples removed from analysis") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "480px")
| name | sex | dose | files | group | original_names |
|---|---|---|---|---|---|
| female_5000_bc74 | female | 5000 | bc74 | female_5000 | SRR5890426 |
| male_0_bc1 | male | 0 | bc01 | male_0 | SRR5890399 |
| male_0_bc2 | male | 0 | bc02 | male_0 | SRR5890400 |
| male_0_bc3 | male | 0 | bc03 | male_0 | SRR5890397 |
| male_0_bc4 | male | 0 | bc04 | male_0 | SRR5890398 |
| male_0_bc5 | male | 0 | bc05 | male_0 | SRR5890403 |
| male_0_bc6 | male | 0 | bc06 | male_0 | SRR5890404 |
| male_0_bc7 | male | 0 | bc07 | male_0 | SRR5890401 |
| male_0_bc8 | male | 0 | bc08 | male_0 | SRR5890402 |
| male_0_bc9 | male | 0 | bc09 | male_0 | SRR5890436 |
| male_0_bc10 | male | 0 | bc10 | male_0 | SRR5890367 |
| male_1000_bc21 | male | 1000 | bc21 | male_1000 | SRR5890376 |
| male_1000_bc22 | male | 1000 | bc22 | male_1000 | SRR5890375 |
| male_1000_bc23 | male | 1000 | bc23 | male_1000 | SRR5890374 |
| male_1000_bc24 | male | 1000 | bc24 | male_1000 | SRR5890373 |
| male_1000_bc25 | male | 1000 | bc25 | male_1000 | SRR5890380 |
| male_1000_bc26 | male | 1000 | bc26 | male_1000 | SRR5890379 |
| male_1000_bc27 | male | 1000 | bc27 | male_1000 | SRR5890378 |
| male_1000_bc28 | male | 1000 | bc28 | male_1000 | SRR5890377 |
| male_1000_bc29 | male | 1000 | bc29 | male_1000 | SRR5890369 |
| male_1000_bc30 | male | 1000 | bc30 | male_1000 | SRR5890368 |
| male_2_bc11 | male | 2 | bc11 | male_2 | SRR5890409 |
| male_2_bc12 | male | 2 | bc12 | male_2 | SRR5890410 |
| male_2_bc13 | male | 2 | bc13 | male_2 | SRR5890407 |
| male_2_bc14 | male | 2 | bc14 | male_2 | SRR5890408 |
| male_2_bc15 | male | 2 | bc15 | male_2 | SRR5890413 |
| male_2_bc16 | male | 2 | bc16 | male_2 | SRR5890414 |
| male_2_bc17 | male | 2 | bc17 | male_2 | SRR5890411 |
| male_2_bc18 | male | 2 | bc18 | male_2 | SRR5890412 |
| male_2_bc19 | male | 2 | bc19 | male_2 | SRR5890417 |
| male_2_bc20 | male | 2 | bc20 | male_2 | SRR5890418 |
| male_5000_bc31 | male | 5000 | bc31 | male_5000 | SRR5890439 |
| male_5000_bc32 | male | 5000 | bc32 | male_5000 | SRR5890361 |
| male_5000_bc33 | male | 5000 | bc33 | male_5000 | SRR5890364 |
| male_5000_bc34 | male | 5000 | bc34 | male_5000 | SRR5890365 |
| male_5000_bc35 | male | 5000 | bc35 | male_5000 | SRR5890427 |
| male_5000_bc36 | male | 5000 | bc36 | male_5000 | SRR5890433 |
| male_5000_bc37 | male | 5000 | bc37 | male_5000 | SRR5890435 |
| male_5000_bc38 | male | 5000 | bc38 | male_5000 | SRR5890438 |
| male_5000_bc39 | male | 5000 | bc39 | male_5000 | SRR5890419 |
| male_5000_bc40 | male | 5000 | bc40 | male_5000 | SRR5890420 |
knitr::kable(DESeqDesignAsRead %>%
dplyr::filter(original_names %in% removed) %>%
group_by(!!sym(params$design)) %>% tally(),
row.names = F,
col.names = c("Experimental group", "Number of samples removed"),
caption="Number of samples removed from each experimental group") %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
scroll_box(width = "100%", height = "480px")
| Experimental group | Number of samples removed |
|---|---|
| male_0 | 10 |
| male_1000 | 10 |
| male_2 | 10 |
| male_5000 | 7 |
This table shows the original sample metadata including any samples that were removed within this report.
# Conditionally sort by dose if that column name is present?
knitr::kable(DESeqDesignAsRead %>% arrange(!!sym(params$design)),
row.names = F,
caption="Complete sample metadata as provided to the Genomics Laboratory.") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(width = "100%", height = "480px")
| name | sex | dose | files | group | original_names |
|---|---|---|---|---|---|
| female_0_bc41 | female | 0 | bc41 | female_0 | SRR5890392 |
| female_0_bc42 | female | 0 | bc42 | female_0 | SRR5890389 |
| female_0_bc43 | female | 0 | bc43 | female_0 | SRR5890396 |
| female_0_bc44 | female | 0 | bc44 | female_0 | SRR5890393 |
| female_0_bc45 | female | 0 | bc45 | female_0 | SRR5890429 |
| female_0_bc46 | female | 0 | bc46 | female_0 | SRR5890370 |
| female_0_bc47 | female | 0 | bc47 | female_0 | SRR5890387 |
| female_0_bc48 | female | 0 | bc48 | female_0 | SRR5890381 |
| female_0_bc49 | female | 0 | bc49 | female_0 | SRR5890371 |
| female_0_bc50 | female | 0 | bc50 | female_0 | SRR5890391 |
| female_1000_bc61 | female | 1000 | bc61 | female_1000 | SRR5890388 |
| female_1000_bc62 | female | 1000 | bc62 | female_1000 | SRR5890431 |
| female_1000_bc63 | female | 1000 | bc63 | female_1000 | SRR5890386 |
| female_1000_bc64 | female | 1000 | bc64 | female_1000 | SRR5890440 |
| female_1000_bc65 | female | 1000 | bc65 | female_1000 | SRR5890384 |
| female_1000_bc66 | female | 1000 | bc66 | female_1000 | SRR5890405 |
| female_1000_bc67 | female | 1000 | bc67 | female_1000 | SRR5890382 |
| female_1000_bc68 | female | 1000 | bc68 | female_1000 | SRR5890362 |
| female_1000_bc69 | female | 1000 | bc69 | female_1000 | SRR5890394 |
| female_1000_bc70 | female | 1000 | bc70 | female_1000 | SRR5890390 |
| female_2_bc51 | female | 2 | bc51 | female_2 | SRR5890421 |
| female_2_bc52 | female | 2 | bc52 | female_2 | SRR5890422 |
| female_2_bc53 | female | 2 | bc53 | female_2 | SRR5890383 |
| female_2_bc54 | female | 2 | bc54 | female_2 | SRR5890423 |
| female_2_bc55 | female | 2 | bc55 | female_2 | SRR5890363 |
| female_2_bc56 | female | 2 | bc56 | female_2 | SRR5890385 |
| female_2_bc57 | female | 2 | bc57 | female_2 | SRR5890415 |
| female_2_bc58 | female | 2 | bc58 | female_2 | SRR5890416 |
| female_2_bc59 | female | 2 | bc59 | female_2 | SRR5890395 |
| female_2_bc60 | female | 2 | bc60 | female_2 | SRR5890434 |
| female_5000_bc71 | female | 5000 | bc71 | female_5000 | SRR5890372 |
| female_5000_bc72 | female | 5000 | bc72 | female_5000 | SRR5890424 |
| female_5000_bc73 | female | 5000 | bc73 | female_5000 | SRR5890425 |
| female_5000_bc74 | female | 5000 | bc74 | female_5000 | SRR5890426 |
| female_5000_bc75 | female | 5000 | bc75 | female_5000 | SRR5890366 |
| female_5000_bc76 | female | 5000 | bc76 | female_5000 | SRR5890428 |
| female_5000_bc77 | female | 5000 | bc77 | female_5000 | SRR5890406 |
| female_5000_bc78 | female | 5000 | bc78 | female_5000 | SRR5890430 |
| female_5000_bc79 | female | 5000 | bc79 | female_5000 | SRR5890437 |
| female_5000_bc80 | female | 5000 | bc80 | female_5000 | SRR5890432 |
| male_0_bc1 | male | 0 | bc01 | male_0 | SRR5890399 |
| male_0_bc2 | male | 0 | bc02 | male_0 | SRR5890400 |
| male_0_bc3 | male | 0 | bc03 | male_0 | SRR5890397 |
| male_0_bc4 | male | 0 | bc04 | male_0 | SRR5890398 |
| male_0_bc5 | male | 0 | bc05 | male_0 | SRR5890403 |
| male_0_bc6 | male | 0 | bc06 | male_0 | SRR5890404 |
| male_0_bc7 | male | 0 | bc07 | male_0 | SRR5890401 |
| male_0_bc8 | male | 0 | bc08 | male_0 | SRR5890402 |
| male_0_bc9 | male | 0 | bc09 | male_0 | SRR5890436 |
| male_0_bc10 | male | 0 | bc10 | male_0 | SRR5890367 |
| male_1000_bc21 | male | 1000 | bc21 | male_1000 | SRR5890376 |
| male_1000_bc22 | male | 1000 | bc22 | male_1000 | SRR5890375 |
| male_1000_bc23 | male | 1000 | bc23 | male_1000 | SRR5890374 |
| male_1000_bc24 | male | 1000 | bc24 | male_1000 | SRR5890373 |
| male_1000_bc25 | male | 1000 | bc25 | male_1000 | SRR5890380 |
| male_1000_bc26 | male | 1000 | bc26 | male_1000 | SRR5890379 |
| male_1000_bc27 | male | 1000 | bc27 | male_1000 | SRR5890378 |
| male_1000_bc28 | male | 1000 | bc28 | male_1000 | SRR5890377 |
| male_1000_bc29 | male | 1000 | bc29 | male_1000 | SRR5890369 |
| male_1000_bc30 | male | 1000 | bc30 | male_1000 | SRR5890368 |
| male_2_bc11 | male | 2 | bc11 | male_2 | SRR5890409 |
| male_2_bc12 | male | 2 | bc12 | male_2 | SRR5890410 |
| male_2_bc13 | male | 2 | bc13 | male_2 | SRR5890407 |
| male_2_bc14 | male | 2 | bc14 | male_2 | SRR5890408 |
| male_2_bc15 | male | 2 | bc15 | male_2 | SRR5890413 |
| male_2_bc16 | male | 2 | bc16 | male_2 | SRR5890414 |
| male_2_bc17 | male | 2 | bc17 | male_2 | SRR5890411 |
| male_2_bc18 | male | 2 | bc18 | male_2 | SRR5890412 |
| male_2_bc19 | male | 2 | bc19 | male_2 | SRR5890417 |
| male_2_bc20 | male | 2 | bc20 | male_2 | SRR5890418 |
| male_5000_bc31 | male | 5000 | bc31 | male_5000 | SRR5890439 |
| male_5000_bc32 | male | 5000 | bc32 | male_5000 | SRR5890361 |
| male_5000_bc33 | male | 5000 | bc33 | male_5000 | SRR5890364 |
| male_5000_bc34 | male | 5000 | bc34 | male_5000 | SRR5890365 |
| male_5000_bc35 | male | 5000 | bc35 | male_5000 | SRR5890427 |
| male_5000_bc36 | male | 5000 | bc36 | male_5000 | SRR5890433 |
| male_5000_bc37 | male | 5000 | bc37 | male_5000 | SRR5890435 |
| male_5000_bc38 | male | 5000 | bc38 | male_5000 | SRR5890438 |
| male_5000_bc39 | male | 5000 | bc39 | male_5000 | SRR5890419 |
| male_5000_bc40 | male | 5000 | bc40 | male_5000 | SRR5890420 |
knitr::kable(DESeqDesignAsRead %>% group_by(!!sym(params$design)) %>% tally(),
row.names = F,
col.names = c("Experimental group", "Number of samples as provided"),
caption="Number of samples and experimental groups as provided") %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
scroll_box(width = "100%", height = "480px")
| Experimental group | Number of samples as provided |
|---|---|
| female_0 | 10 |
| female_1000 | 10 |
| female_2 | 10 |
| female_5000 | 10 |
| male_0 | 10 |
| male_1000 | 10 |
| male_2 | 10 |
| male_5000 | 10 |
###################################################################################
#DEFINE FUNCTIONS
###################################################################################
plot.barplots<-function(samples,b) {
color <- NULL
for (h in 1:ncol(norm_data)){
if (substring(colnames(norm_data)[h], 1, 3) == substring(condition2, 1, 3)) { color <- c(color, "red3") } else { color <- c(color, "darkgrey")}
}
fileNamePlot <- paste0(b, row.names(samples)[b], ".png")
pseudoTitle <- paste0(row.names(samples)[b], "_pAdj:", samples[b,"padj"])
png(file=paste(fileNamePlot, sep="/"), width=1200, height=700, pointsize=20)
par(mar=c(8,4,3,1))
barplot(as.numeric(norm_data[row.names(samples)[b],]), las=2, col=color, main=pseudoTitle, cex.names=0.5, cex.axis=0.8, names.arg=colnames(norm_data))
dev.off()
} #plot.barplots function done
###################################################################################
draw.barplots<-function(samples, top_bottom, NUM){
if (nrow(samples) == 0) {
#print("no genes to plot")
} else {
if (top_bottom == "top") {
#print(paste0("drawing Top ", NUM, " plots"))
if (nrow(samples) <= NUM) {
for (b in 1:nrow(samples)) {plot.barplots(samples,b)}
}
if (nrow(samples) > NUM) {
for (b in 1:NUM) {plot.barplots(samples,b)}
}
}
if (top_bottom == "bottom") {
#print(paste0("drawing Bottom", NUM, " plots"))
if (nrow(samples) <= NUM) {
for (b in 1:nrow(samples)) {plot.barplots(samples,b)}
}
if (nrow(samples) > NUM) {
for (b in ((nrow(samples)-NUM+1):nrow(samples))) {plot.barplots(DEsamples,b)}
}
}}
} #draw.barplots function done
###################################################################################
###################################################################################
The code above (not shown by default) loads in plotting functions specific to the Omics Data Analysis Frameworks for Regulatory application (R-ODAF) template. More information on the R-ODAF framework is available here.
# Initial setup for DESeq2 contrasts
cooks <- FALSE # Normally set Cook's cutoff to false
resList <- list()
Counts <- counts(dds, normalized=TRUE)
CPMdds <- cpm(counts(dds, normalized=TRUE))
for (x in 1:nrow(contrasts)) { ## for all comparisons to be done
condition1 <- contrasts[x,2]
condition2 <- contrasts[x,1]
print(paste(condition2, " vs ", condition1, ":", NORM_TYPE))
DE_Design <- matrix(data=NA, ncol=2)
DE_Design <- as.matrix(DESeqDesign[c(grep(condition1,DESeqDesign[,DESIGN]), grep(condition2,DESeqDesign[,DESIGN])),])
samples <- sampleData[, rownames(DE_Design)]
colnames(samples) <- NULL
###########
print(paste0("Filtering genes: 75% of at least 1 group need to be above ", MinCount, " CPM"))
print("AND")
print("Detecting spurious spikes: Max-Median > Sum/(Rep+1)" )
SampPerGroup <- table(DE_Design[,DESIGN])
idx <- FlagSpike <- NameRows <- NULL
for (gene in 1:nrow(dds)) { # Loop over each gene
GroupsPass <- checkSpike <- NULL
for (group in 1:length(SampPerGroup)) { # Test if group passes
sampleCols <- grep(dimnames(SampPerGroup)[[1]][group], DE_Design[,DESIGN])
Check <- sum(CPMdds[gene, sampleCols] >= MinCount) >= 0.75*SampPerGroup[group]
GroupsPass <- c(GroupsPass, Check)
if (Check == FALSE) {checkSpike <- c(checkSpike, Check)} else {
checkSpike <- c(checkSpike, ((max(Counts[gene, sampleCols]) - median(Counts[gene,sampleCols])) >=
(sum(Counts[gene, sampleCols]) / (SampPerGroup[group]+1))))
}
}
idx <- c(idx, as.logical(sum(GroupsPass)))
if (sum(checkSpike) >=1) {
FlagSpike <- rbind(FlagSpike, Counts[gene,])
NameRows <<- c(NameRows, row.names(Counts)[gene])
row.names(FlagSpike) <- NameRows
}
}
print("Obtaining the DESeq2 results")
currentContrast <- c(DESIGN, condition2, condition1)
res <- results(dds[idx],
parallel = TRUE, BPPARAM=MulticoreParam(39),
contrast=currentContrast,
pAdjustMethod= 'fdr',
cooksCutoff=cooks) # Cooks cutoff disabled - manually inspect.
res <- lfcShrink(dds=dds[idx],
contrast=currentContrast,
res=res,
type="ashr")
#Make new directory for the ODAF-specific files
ODAFdir <- file.path(paths$DEG_output, "R-ODAF")
if(!dir.exists(ODAFdir)) {dir.create(ODAFdir, recursive = TRUE)}
setwd(ODAFdir)
FileName <- paste(NORM_TYPE, condition2,"vs",condition1, "FDR", pAdjValue, sep="_")
#Save output tables
norm_data <<- counts(dds[idx],normalized=TRUE)
colnames(norm_data) <- colData(dds)[,DESIGN]
write.table(norm_data,file=paste0(FileName, "_Norm_Data.txt"), sep="\t", quote=FALSE)
write.table(FlagSpike,file=paste0(FileName, "_FlaggedSpikes.txt"), sep="\t", quote=FALSE)
DEsamples <<- subset(res,res$padj < pAdjValue)
write.table(DEsamples,file=paste0(FileName,"_DEG_table.txt"), sep="\t", quote=FALSE)
DEspikes <<- DEsamples[rownames(DEsamples)%in%NameRows,]
write.table(DEspikes,file=paste0(FileName,"_DEspikes_table.txt"), sep="\t",quote=FALSE)
resList[[x]] <- res
if (R_ODAF_plots==TRUE) {
print("creating Read count Plots")
# top DEGs
plotdir <- file.path(paths$DEG_output, "plots")
if(!dir.exists(plotdir)) {dir.create(plotdir, recursive = TRUE)}
barplot.dir <- file.path(paths$DEG_output, "plots", "/barplot_genes/")
if(!dir.exists(barplot.dir)) {dir.create(barplot.dir, recursive = TRUE)}
TOPbarplot.dir <- file.path(barplot.dir, "Top_DEGs")
if(!dir.exists(TOPbarplot.dir)) {dir.create(TOPbarplot.dir, recursive = TRUE)}
setwd(TOPbarplot.dir)
draw.barplots(DEsamples, "top", 20) # (DEsamples, top_bottom, NUM)
print("Top 20 DEG plots done")
# Spurious spikes
SPIKEbarplot.dir <- file.path(barplot.dir, "DE_Spurious_spikes")
if(!dir.exists(SPIKEbarplot.dir)) {dir.create(SPIKEbarplot.dir, recursive = TRUE)}
setwd(SPIKEbarplot.dir)
draw.barplots(DEspikes, "top", nrow(DEspikes)) # (DEsamples, top_bottom, NUM)
print("All DE_Spurious_spike plots done")
}
}
## [1] "female_2 vs female_0 : 2020_Flame_retardants_female_DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "female_1000 vs female_0 : 2020_Flame_retardants_female_DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
## [1] "female_5000 vs female_0 : 2020_Flame_retardants_female_DESeq2_RNA-Seq"
## [1] "Filtering genes: 75% of at least 1 group need to be above 1 CPM"
## [1] "AND"
## [1] "Detecting spurious spikes: Max-Median > Sum/(Rep+1)"
## [1] "Obtaining the DESeq2 results"
The code above (not shown by default) runs the R-ODAF spurious spike detection and outputs the DESeq Results objects as a list for each contrast. As specified by the R-ODAF guidelines, 75% of at least 1 group need to be above 1 CPM and spurious spikes were removed in which Max-Median > Sum/(Rep+1).
The log2FoldChange shrinkage procedure used was ashr. An alpha of 0.1 was used to extract raw results, which are reported as the Wald test p-value. To account for multiple testing, fdr adjusted p-values are reported. Cook’s cutoff was set to FALSE in this analysis.
#listEnsembl()
#listMarts()
#listDatasets(useMart('ensembl'))
ensembl <- useMart("ensembl", dataset = ensembl_species, host = "useast.ensembl.org")
#listAttributes(ensembl)
#listFilters(ensembl)
genes <- row.names(resList[[1]])
#genes_all <- unique(row.names(assay(dds)))
if (file.exists(file.path(paths$RData,"id_table.Rdata"))) {
print(paste("Already found ID table; skipping biomaRt query and loading from disk."))
load(file.path(paths$RData,"id_table.Rdata"))
id_table <- id_table_entrez[,1:3]
} else {
id_table_entrez <- getBM(filters=biomart_filter,
attributes= c("ensembl_gene_id",
"external_gene_name", # mgi_symbol for Mouse
"description",
"entrezgene_id",
"entrezgene_accession"),
values=genes,
mart=ensembl)
save(id_table_entrez, file=file.path(paths$RData,"id_table.Rdata"))
id_table <- id_table_entrez[,1:3]
}
## [1] "Already found ID table; skipping biomaRt query and loading from disk."
# genes_entrezid <- dplyr::left_join(data.frame(genes), id_table_entrez, by=c("genes" = biomart_filter)) # Can I remove this? I think so
sigtabList <- list()
alltablList <- list()
for (i in 1:length(resList)) {
print(i)
sigTab <- resList[[i]]
# Add taxonomy
if (nrow(sigTab) == 0) {
next
} else {
sigTab <- cbind(genes=row.names(resList[[i]]),
as(sigTab, "data.frame"),
contrast = gsub(pattern = paste0("log2.*", DESIGN, "\ "),
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2]))
names(sigTab)[names(sigTab) == "genes"] <- biomart_filter
sigTab <- dplyr::left_join(sigTab,
id_table,
by=biomart_filter)
sigTab <- dplyr::mutate(sigTab, linearFoldChange=ifelse(log2FoldChange > 0,
2 ^ log2FoldChange,
-1 / (2 ^ log2FoldChange)))
#sigTab <- sigTab[,c(1,9,10,2,3,11,5:8)] # Reorder columns
sigTab <- sigTab[,c(1,8,9,2,3,10,4:7)]
alltablList[[i]] <- sigTab
sigTab <- sigTab[!is.na(sigTab$padj) & sigTab$padj < alpha & abs(sigTab$linearFoldChange) > linear_fc_filter, ] ## FILTERS!
sigtabList[[i]] <- sigTab %>% dplyr::distinct()
}
}
## [1] 1
## [1] 2
## [1] 3
# Write dataframe of all results
# Significant only
sigtabList <- sigtabList[!sapply(sigtabList, is.null)]
significantResults <- rbindlist(sigtabList)
significantResults <- dplyr::distinct(significantResults)
significantResults$contrast <- factor(significantResults$contrast)
# All results including non-significant
allResults <- rbindlist(alltablList)
# All Results for Plotting
# Replace NA gene symbols with ensembl ID
allResults$external_gene_name[is.na(allResults$external_gene_name)] <- allResults$ensembl_gene_id[is.na(allResults$external_gene_name)]
# Replace blank gene symbols with ensembl ID
allResults$external_gene_name[allResults$external_gene_name == ""] <- allResults$ensembl_gene_id[allResults$external_gene_name == ""]
IPA <- allResults %>%
distinct() %>%
pivot_wider(names_from = contrast, values_from = c(log2FoldChange,linearFoldChange,lfcSE,pvalue,padj))
allResults$padj[allResults$padj == 0] <- 10^-100 # For plotting purposes!
allResultsOrdered_logFC_filter <- dplyr::filter(allResults, abs(linearFoldChange) > 1.5) %>%
arrange(-abs(linearFoldChange))
allResultsOrdered_logFC_filter <- dplyr::filter(allResultsOrdered_logFC_filter, padj < alpha)
res.df <- allResultsOrdered_logFC_filter
degTable <- significantResults %>%
dplyr::group_by(contrast) %>%
dplyr::count()
lengths <- lapply(resList, nrow)
longest <- which.max(lengths)
summaryTable <- data.frame( genes=row.names(resList[[longest]]),
baseMean=resList[[longest]]$baseMean )
names(summaryTable)[names(summaryTable) == "genes"] <- biomart_filter
contrastsInSummary <- vector()
for (i in 1:length(resList)) {
print(i)
n <- resList[[i]]@elementMetadata[[2]][2]
n <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN), # Might need to be MLE for some tests
replacement = paste0("log2 Fold Change"),
x = resList[[i]]@elementMetadata[[2]][2])
p <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN),
replacement = resList[[i]]@elementMetadata[[2]][6], # Not used?
x = resList[[i]]@elementMetadata[[2]][2])
q <- gsub(pattern = paste0("log2\ fold\ change\ \\(MMSE\\):\ ",DESIGN,"\ "),
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
message(n)
message(p)
message(q)
toJoin <- as.data.frame(resList[[i]])
setDT(toJoin, keep.rownames = T)[]
setnames(toJoin, 1, biomart_filter)
toJoin <- mutate(toJoin, linearFoldChange=ifelse(log2FoldChange > 0,
2 ^ log2FoldChange,
-1 / (2 ^ log2FoldChange)))
toJoin <- toJoin[,c(1:3,7,4:6)]
summaryTable <- dplyr::left_join(summaryTable, dplyr::select(toJoin, !c(baseMean,pvalue,lfcSE)), by=biomart_filter)
names(summaryTable)[[ncol(summaryTable)-2]] <- paste0("log2FoldChange_",i)
names(summaryTable)[[ncol(summaryTable)-1]] <- paste0("linearFoldChange_",i)
names(summaryTable)[[ncol(summaryTable)]] <- paste0("FDR_",i)
contrastsInSummary[i] <- q
print(summary(resList[[i]], pAdjValue))
}
## [1] 1
##
## out of 10666 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 9, 0.084%
## LFC < 0 (down) : 3, 0.028%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 2
##
## out of 10666 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 51, 0.48%
## LFC < 0 (down) : 24, 0.23%
## outliers [1] : 0, 0%
## low counts [2] : 621, 5.8%
## (mean count < 11)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
## [1] 3
##
## out of 10701 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 74, 0.69%
## LFC < 0 (down) : 61, 0.57%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
## NULL
maxFCs <- allResults %>%
dplyr::group_by(!!sym(biomart_filter)) %>%
dplyr::filter(abs(linearFoldChange) == max(abs(linearFoldChange))) %>%
dplyr::ungroup() %>%
dplyr::select(!!sym(biomart_filter), linearFoldChange)
minPvals <- allResults %>%
group_by(!!sym(biomart_filter)) %>%
dplyr::filter(padj == min(padj)) %>%
dplyr::ungroup() %>%
dplyr::select(!!sym(biomart_filter), padj)
summaryTable <- summaryTable %>%
left_join(id_table, by=biomart_filter) %>%
left_join(maxFCs, by=biomart_filter) %>%
left_join(minPvals, by=biomart_filter) %>%
dplyr::rename(maxFoldChange = linearFoldChange,
minFDR_pval = padj) %>%
dplyr::distinct() %>%
mutate(maxFoldChange=abs(maxFoldChange)) # This eliminates the direction of change: this way it's easy to sort.
numColsToPrepend <- ncol(summaryTable) - 3*length(resList) - 2 # Number of columns per contrast = 3. Subtract two for the baseMean and genes columns.
colPositionsToPrependSTART <- ncol(summaryTable) - numColsToPrepend + 1
colPositionsOfData <- ncol(summaryTable) - numColsToPrepend
summaryTable <- summaryTable[,c(1,
colPositionsToPrependSTART:ncol(summaryTable),
2:colPositionsOfData)]
CPMddsDF <- data.frame(genes = row.names(CPMdds), CPMdds, check.names=F)
CPMddsDF <- dplyr::left_join(CPMddsDF, id_table, by=c("genes" = biomart_filter))
numColsToPrepend <- ncol(CPMddsDF) - ncol(CPMdds) - 1
colPositionsToPrependSTART <- ncol(CPMddsDF) - numColsToPrepend + 1
colPositionsOfData <- ncol(CPMddsDF) - numColsToPrepend
CPMddsDF <- CPMddsDF[,c(1,colPositionsToPrependSTART:ncol(CPMddsDF),2:colPositionsOfData)]
The code above (not shown by default) generates tables summarizing the differentially expressed genes (DEGs). A linear fold change cutoff of 1.5 and adjusted p-value of 0.05 was used to filter the results.
Here is the number of DEGs in each group:
kable(degTable,
caption="Number of differentially expressed genes across each contrast") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| contrast | n |
|---|---|
| female_1000 vs female_0 | 25 |
| female_2 vs female_0 | 6 |
| female_5000 vs female_0 | 45 |
#######################################
### Write results table from DESeq2
#######################################
setwd(paths$DEG_output)
write.table(allResults, file=paste0(NORM_TYPE,"-DESeq_output_ALL.txt"), quote=F, sep='\t', col.names=NA)
write.table(significantResults, file=paste0(NORM_TYPE,"-DESeq_output_significant.txt"), quote=F, sep='\t', col.names=NA)
write.table(summaryTable, file=paste0(NORM_TYPE,"-DESeq_output_all_genes.txt"), quote=F, sep='\t', col.names=NA)
write.table(CPMddsDF, file=paste0(NORM_TYPE,"-Per_sample_CPM.txt"), quote=F, sep='\t', col.names=NA)
write.table(Counts, file=paste0(NORM_TYPE,"-Per_sample_normalized_counts.txt"), quote=F, sep='\t', col.names=NA)
The code above (not shown by default) writes text files for each DEG summary type.
setwd(paths$DEG_output)
#######################################
### Write results above but in Excel
#######################################
### Global options
options("openxlsx.borderColour" = "#4F80BD")
options("openxlsx.borderStyle" = "thin")
options("openxlsx.maxWidth" = 50)
hs1 <- createStyle(textDecoration = "Bold",
border = "Bottom",
fontColour = "black")
hs2 <- createStyle(textDecoration = "Bold",
border = c("top", "bottom", "left", "right"),
fontColour = "black",
fgFill="#C5D9F1")
### Summary results - one gene per line, columns are contrasts
wb1 <- createWorkbook()
modifyBaseFont(wb1, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb1, "DESeq_results_per_gene")
for (j in 1:length(contrastsInSummary)) {
myStartcol=7+((j-1)*3)
myEndcol=9+((j-1)*3)
mergeCells(wb1,
sheet = 1,
cols = myStartcol:myEndcol,
rows = 1)
writeData(
wb1,
sheet = 1,
x = contrastsInSummary[j],
startCol = myStartcol,
startRow = 1)
}
conditionalFormatting(wb1,
sheet = 1,
rows = 1,
cols = 1:ncol(summaryTable),
type = "contains",
rule = "",
style=hs2)
freezePane(wb1, sheet = 1, firstActiveRow = 3, firstActiveCol = 4)
writeDataTable(wb1,
sheet = 1,
startRow = 2,
x = summaryTable,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb1, sheet = 1, cols = 1:6, widths = "auto") # This is hard-coded, so prone to error; will only impact auto adjustment of col widths.
setColWidths(wb1, sheet = 1, cols = 7:ncol(summaryTable), widths = 13) # This is hard-coded, so prone to error; will only impact auto adjustment of col widths.
fname1 <- paste0("1.",NORM_TYPE,"-DESeq_by_gene.xlsx")
saveWorkbook(wb1, fname1, overwrite = TRUE)
### All results in one table
wb2 <- createWorkbook()
modifyBaseFont(wb2, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb2, paste0("FDR",pAdjValue,".Linear.FC.",linear_fc_filter))
freezePane(wb2, sheet = 1, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb2,
sheet = 1,
x = significantResults,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb2, sheet = 1, cols = 1:ncol(significantResults), widths = "auto")
addWorksheet(wb2, "DESeq_all_results")
freezePane(wb2, sheet = 2, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb2,
sheet = 2,
x = allResults,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb2, sheet = 2, cols = 1:ncol(allResults), widths = "auto")
saveWorkbook(wb2, paste0("2.",NORM_TYPE,"-DESeq_all.xlsx"), overwrite = TRUE)
### All results with different tabs for each contrast
wb3 <- createWorkbook()
modifyBaseFont(wb3, fontSize = 10, fontName = "Arial Narrow")
short_contrast_names <- stringr::str_trunc(short_contrast_names, 30, side="center")
for (i in 1:length(levels(factor(allResults$contrast)))) {
print(i)
dataToWrite <- allResults[allResults$contrast==levels(factor(allResults$contrast))[i],]
addWorksheet(wb3, short_contrast_names[i])
freezePane(wb3, sheet = i, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb3,
sheet = i,
x = dataToWrite,
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb3, sheet = i, cols = 1:ncol(dataToWrite), widths = "auto")
}
## [1] 1
## [1] 2
## [1] 3
saveWorkbook(wb3, paste0("3.",NORM_TYPE,"-DESeq_by_contrast.xlsx"), overwrite = TRUE)
### CPM
wb4 <- createWorkbook()
modifyBaseFont(wb4, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb4, "Counts per million")
freezePane(wb4, sheet = 1, firstRow = TRUE, firstActiveCol = 4)
writeDataTable(wb4,
sheet = 1,
x = as.data.frame(CPMddsDF),
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb4, sheet = 1, cols = 1:ncol(CPMddsDF), widths = "auto")
saveWorkbook(wb4, paste0("4.",NORM_TYPE,"-CPM.xlsx"), overwrite = TRUE)
### IPA
wb5 <- createWorkbook()
modifyBaseFont(wb5, fontSize = 10, fontName = "Arial Narrow")
addWorksheet(wb5, "For IPA upload")
freezePane(wb5, sheet = 1, firstRow = TRUE, firstActiveCol = 5)
writeDataTable(wb5,
sheet = 1,
x = as.data.frame(IPA),
colNames = TRUE,
rowNames = F,
tableStyle = "none",
headerStyle = hs1,
keepNA = T,
na.string = "NA")
setColWidths(wb5, sheet = 1, cols = 1:ncol(IPA), widths = "auto")
saveWorkbook(wb5, paste0("5.",NORM_TYPE,"-IPA.xlsx"), overwrite = TRUE)
The code above (not shown by default) writes Excel workbooks and text files of DEG lists.
These files should be provided to you as a separate zip file.
setwd(paths$DEG_output)
xfun::embed_files(list.files(".", "[.](xlsx)$"), name = "RNASeq_Spreadsheets.zip")
setwd(paths$DEG_output)
xfun::embed_files(list.files(".", "[.](txt)$"), name = "RNASeq_Text_Files.zip")
setwd(paths$DEG_output)
xfun::embed_dir(ODAFdir, name = "RNASeq_R-ODAF_text_files.zip")
# Save RData here if flag is TRUE.
if(is.na(params$group_facet)) {
save.image(file = file.path(paths$RData, "Partial_analysis.RData"))
} else {
save.image(file = file.path(paths$RData, paste0("Partial_analysis_", params$group_filter, ".RData")))
}
# Add a flag to the start of the script with if statements in all code chunks to check if the flag is set.
# If flag is TRUE, run analysis; if flag is FALSE, SKIP analysis and continue here (just to modify plotting output).
## Perform PCA analysis and make plot
plotPCA(rld, intgroup = params$intgroup, ntop=nrow(assay(rld))) +
theme(legend.position = "bottom") +
guides(fill=guide_legend(ncol=3))
## Get percent of variance explained
data_pca <- plotPCA(rld, intgroup = params$intgroup, ntop=nrow(assay(rld)), returnData = TRUE)
percentVar <- round(100 * attr(data_pca, "percentVar"))
This plot shows the first two principal components that explain the variability in the data using the regularized log count data. If you are unfamiliar with principal component analysis, you might want to check the Wikipedia entry or this interactive explanation. In this case, the first and second principal component explain 10 and 8 percent of the variance respectively.
rld_degs <- rld[row.names(assay(rld)) %in% significantResults[[biomart_filter]],] # d[ d[[THECOLUMN]] == someValue , ]
## Perform PCA analysis and make plot
plotPCA(rld_degs, intgroup = params$intgroup, ntop=nrow(rld_degs)) +
theme(legend.position = "bottom") +
guides(fill=guide_legend(ncol=3))
PCAplotDEGs <- plotPCA(rld_degs, intgroup = params$intgroup, ntop=nrow(rld_degs), returnData = TRUE)
## Get percent of variance explained
percentVarDEGs <- round(100 * attr(PCAplotDEGs, "percentVar"))
This plot shows the principal components analysis limited to all DEGs. In this case, the first and second principal component explain 52 and 11 percent of the variance respectively.
# Run this code only once for both the PCA and clustering analysis
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[1:nBest]
rld_top <- rld[select,]
## Perform PCA analysis and make plot
plotPCA(rld_top, intgroup = params$intgroup, ntop=nrow(rld_top)) +
theme(legend.position = "bottom") +
guides(fill=guide_legend(ncol=3))
## Get percent of variance explained
data_pcaTop <- plotPCA(rld_top, intgroup = params$intgroup, returnData = TRUE, ntop=nrow(rld_top))
percentVarTop <- round(100 * attr(data_pcaTop, "percentVar"))
This plot shows the principal components analysis limited to all DEGs. In this case, the first and second principal component explain 28 and 11 percent of the variance respectively.
## Obtain the sample euclidean distances
sampleDists <- stats::dist(t(assay(rld)))
sampleDistMatrix <- as.matrix(sampleDists)
## Add names based on intgroup
rownames(sampleDistMatrix) <- apply(as.data.frame(colData(rld)[, params$design]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrix) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists, color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
## Limit to the top n genes
# rv = rowVars(assay(rld))
# select = order(rv, decreasing = TRUE)[1:nBest]
## Obtain the sample euclidean distances
# sampleDistsTop <- dist(t(assay(rld)[select,]))
sampleDistsDEGs <- stats::dist(t(assay(rld_degs)))
# sampleDistMatrixTop <- as.matrix(sampleDistsTop)
sampleDistMatrixDEGs <- as.matrix(sampleDistsDEGs)
## Add names based on intgroup
rownames(sampleDistMatrixDEGs) <- apply(as.data.frame(colData(rld)[, params$design]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrixDEGs) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrixDEGs,
clustering_distance_rows = sampleDistsDEGs,
clustering_distance_cols = sampleDistsDEGs,
color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data of all DEGs. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
## Limit to the top n genes
## Using the select object from PCA code above...
## Obtain the sample euclidean distances
sampleDistsTop <- stats::dist(t(assay(rld)[select,]))
sampleDistMatrixTop <- as.matrix(sampleDistsTop)
## Add names based on intgroup
rownames(sampleDistMatrixTop) <- apply(as.data.frame(colData(rld)[, params$design]), 1,
paste, collapse = ' : ')
colnames(sampleDistMatrixTop) <- NULL
## Define colors to use for the heatmap
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrixTop,
clustering_distance_rows = sampleDistsTop,
clustering_distance_cols = sampleDistsTop,
color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data of the top 100 most variable genes. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
mat <- assay(rld)[row.names(assay(rld)) %in% significantResults[[biomart_filter]],]
mat <- mat - rowMeans(mat)
heatmap_df <- as.data.frame(colData(rld)[,params$intgroup])
row.names(heatmap_df) <- colData(rld)$original_names # Customize!
pheatmap(mat,
annotation_col=heatmap_df,
show_rownames = FALSE,
border_color = NA,
scale="row")
# color = inferno(10)
mat_top <- assay(rld)[row.names(assay(rld)) %in% allResultsOrdered_logFC_filter[1:nHeatmapDEGs,][[biomart_filter]],]
mat_top <- mat_top - rowMeans(mat_top)
genes_for_heatmap <- row.names(mat_top)
genes_for_heatmap <- data.frame(genes=row.names(mat_top))
names(genes_for_heatmap)[names(genes_for_heatmap) == "genes"] <- biomart_filter
genes_for_heatmap <- dplyr::left_join(genes_for_heatmap, id_table_entrez,
by=biomart_filter) %>%
dplyr::distinct()
pheatmap(mat_top,
annotation_col=heatmap_df,
labels_row=genes_for_heatmap[["external_gene_name"]],
show_rownames = TRUE,
border_color = NA,
scale="row")
# color = inferno(10)
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[1:nHeatmap]
matRV <- assay(rld)[select,]
matRV <- matRV - rowMeans(matRV)
genes_for_heatmap <- row.names(matRV)
genes_for_heatmap <- data.frame(genes=row.names(matRV))
names(genes_for_heatmap)[names(genes_for_heatmap) == "genes"] <- biomart_filter
genes_for_heatmap <- dplyr::left_join(genes_for_heatmap, id_table_entrez,
by=biomart_filter) %>%
dplyr::distinct()
pheatmap(matRV,
#color = rev(brewer.pal(11,"RdBu")), # inferno(10),
annotation_col=heatmap_df,
labels_row=genes_for_heatmap[["external_gene_name"]],
border_color = NA,
scale = "row",
cutree_rows = 3,
cutree_cols = 4)
This section contains three groups of MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. Each of the groups has a tab for each contrast. The plots show one point per feature. The points are shown in red if the feature has an adjusted p-value less than the cutoff listed in each section, that is, the statistically significant features are shown in red.
This group of plots shows alpha = 0.1, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
for (i in seq_along(resList)) {
contrast = gsub(pattern = paste0("log2.*", DESIGN, "\ "),
replacement = "",
x = resList[[i]]@elementMetadata[[2]][2])
cat("###", contrast, " \n\n")
MA_title <- paste0('MA plot, alpha = ', metadata(resList[[i]])$alpha,', ',contrast)
DESeq2::plotMA(resList[[i]], alpha = metadata(resList[[i]])$alpha, main = str_wrap(MA_title, width=40))
cat(' \n\n')
}
## Used to have multiple tabsets for MA plots... it was too many MA plots.
## MA plot with alpha = 1/2 of the alpha used in DESeq2::results()
# for (i in 1:length(resList)) {
# contrast = gsub(pattern = "log2.*Time\ ",
# replacement = "",
# x = resList[[i]]@elementMetadata[[2]][2])
# cat("###", contrast, " \n")
# DESeq2::plotMA(resList[[i]], alpha = metadata(resList[[i]])$alpha / 2,
# main = paste('MA plot with alpha =', metadata(resList[[i]])$alpha / 2,',',contrast))
# cat('\n\n')
# }
## MA plot with alpha corresponding to the one that gives the nBest features
# for (i in 1:length(resList)) {
# nBest.actual <- min(nBest, nrow(head(resList[[i]], n = nBest)))
# nBest.alpha <- head( resList[[i]][order(resList[[i]]$pvalue),], n = nBest)$padj[nBest.actual]
# contrast = gsub(pattern = "log2.*Time\ ",
# replacement = "",
# x = resList[[i]]@elementMetadata[[2]][2])
# cat("###", contrast, " \n")
# DESeq2::plotMA(resList[[i]], alpha = nBest.alpha * 1.00000000000001,
# main = paste('MA plot for top', nBest.actual, 'features',',',contrast))
# cat('\n\n')
# }
## P-value histogram plot
ggplot(allResults, aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
facet_wrap( ~ contrast, ncol = 2)
This plot shows a histogram of the unadjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples. The shape depends on the percent of features that are differentially expressed. For further information on how to interpret a histogram of p-values check David Robinson’s post on this topic.
## P-value distribution summary
summary(allResults$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1865 0.4421 0.4577 0.7148 0.9999
This is the numerical summary of the distribution of the p-values.
## Split features by different p-value cutoffs
pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(allResults$pvalue <= x, na.rm = TRUE))
})
pval_table <- do.call(rbind, pval_table)
kable(pval_table, format = 'markdown', align = c('c', 'c'))
| Cut | Count |
|---|---|
| 0.0001 | 174 |
| 0.0010 | 361 |
| 0.0100 | 1130 |
| 0.0250 | 1983 |
| 0.0500 | 3163 |
| 0.1000 | 5183 |
| 0.2000 | 8750 |
| 0.3000 | 11976 |
| 0.4000 | 15220 |
| 0.5000 | 18301 |
| 0.6000 | 21360 |
| 0.7000 | 24425 |
| 0.8000 | 27300 |
| 0.9000 | 30224 |
| 1.0000 | 33182 |
This table shows the number of features with p-values less or equal than some commonly used cutoff values.
## Adjusted p-values histogram plot
ggplot(allResults, aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of', elementMetadata(resList[[1]])$description[grep('adjusted', elementMetadata(resList[[1]])$description)])) +
xlab('Adjusted p-values') +
xlim(c(0, 1.0005)) +
facet_wrap( ~ contrast, ncol = 2, scales="free")
## Warning: Removed 633 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_bar).
This plot shows a histogram of the fdr adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values distribution summary
summary(res.df$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.000014 0.001575 0.001105 0.021004
This is the numerical summary of the distribution of the fdr adjusted p-values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(res.df$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
kable(padj_table, format = 'markdown', align = c('c', 'c'))
| Cut | Count |
|---|---|
| 0.0001 | 62 |
| 0.0010 | 77 |
| 0.0100 | 101 |
| 0.0250 | 105 |
| 0.0500 | 105 |
| 0.1000 | 105 |
| 0.2000 | 105 |
| 0.3000 | 105 |
| 0.4000 | 105 |
| 0.5000 | 105 |
| 0.6000 | 105 |
| 0.7000 | 105 |
| 0.8000 | 105 |
| 0.9000 | 105 |
| 1.0000 | 105 |
This table shows the number of features with fdr adjusted p-values less or equal than some commonly used cutoff values.
This table shows the significant DEGs (passing all filtering criteria) ordered by their absolute fold change. Use the search function to find your feature of interest or sort by one of the columns. You can limit to a single contrast if desired.
searchURL <- "http://www.ncbi.nlm.nih.gov/gene/?term="
## Add search url if appropriate
res.df.dt <- res.df
if(!is.null(searchURL)) {
res.df.dt$ensembl_gene_id <- paste0('<a href="',
searchURL,
res.df.dt$ensembl_gene_id,
'" rel="noopener noreferrer" target="_blank">',
res.df.dt$ensembl_gene_id,
'<br/>',
res.df.dt$external_gene_name,
'</a>')
}
res.df.dt[, 'padj'] <- format(res.df.dt[, 'padj'], scientific = TRUE, digits=digits)
res.df.dt[, 'pvalue'] <- format(res.df.dt[, 'pvalue'], scientific = TRUE, digits=digits)
res.df.dt <- res.df.dt %>% dplyr::select(-c(description, external_gene_name))
DT::datatable(res.df.dt,
options = list(pagingType='full_numbers',
pageLength=20,
scrollX='100%',
dom = 'Bfrtip',
buttons = c('copy',
'csv',
'excel',
'pdf',
'print',
'colvis'),
columnDefs = list(list(visible=FALSE, targets=c(2,4,5)))),
escape = FALSE,
extensions = 'Buttons',
rownames = FALSE,
filter = "top",
colnames = c('Gene' = 'ensembl_gene_id')) %>%
DT::formatRound(which(!colnames(res.df.dt) %in% c('pvalue',
'padj',
'Feature',
'contrast',
'description',
'ensembl_gene_id',
'external_gene_name')),
digits)
This section contains plots showing the normalized counts per sample for each group of interest. Only the best 20 features are shown, ranked by their absolute fold change values. The Y axis is on the log10 scale and the feature name is shown in the title of each plot.
plotCounts_gg <- function(i, dds, intgroup) {
plotdata <- plotCounts(dds,
gene=i,
intgroup=params$intgroup,
returnData = TRUE)
plot_title <- paste(id_table$external_gene_name[id_table[[biomart_filter]] == i],
id_table$ensembl_gene_id[id_table[[biomart_filter]] == i])
if(ncol(plotdata) > 2) {
colorCol = 3
} else {colorCol = 2}
ggplot(plotdata, aes(x = plotdata[,2], y = plotdata[,1], color = plotdata[,colorCol])) +
geom_point(position = position_jitterdodge()) +
ylab('Normalized count') +
xlab('Group') +
ggtitle(plot_title) +
coord_trans(y = "log10") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(color = colnames(plotdata)[colorCol])
}
genesToPlot <- significantResults %>% arrange(-abs(log2FoldChange))
for(i in head(unique(genesToPlot[[biomart_filter]]), nBestFeatures)) {
print(plotCounts_gg(i, dds = dds, intgroup = params$intgroup))
}
This section shows genes of interest sorted by those with highest fold-change within each contrast.
#numResults=20
numResults <- nBestFeatures
allResultsOrdered_logFC_filter %>%
group_by(contrast) %>%
top_n(numResults, wt=abs(log2FoldChange)) %>%
ungroup() %>%
mutate(contrast=as.factor(contrast),
external_gene_name=reorder_within(external_gene_name,log2FoldChange, contrast)) %>%
ggplot(aes(x=log2FoldChange,
y=external_gene_name,
color=contrast,
size=-log(padj))) +
geom_point(show.legend = TRUE) +
facet_wrap(~contrast,
scales="free_y",
ncol=4,
labeller = labeller(contrast = label_wrap_gen(10))) +
scale_y_reordered() +
geom_vline(xintercept=0,
linetype="dashed",
color = "black",
size=1) +
ggtitle(paste0("Top ",
numResults,
" genes ranked by fold change (adjusted p-value <",
alpha,
"), grouped by treatment"))
This section shows a volcano plot for each contrast.
Note that scales are set manually for this plot: therefore, there may be data points outside the range shown (see warnings).
Data reported as significant are shown as red points.
ggplot(allResults, aes(x=log2FoldChange, y=-log10(padj))) +
geom_point(size=0.5, alpha=0.4) +
geom_point(data=significantResults, size=1.5, alpha=1, color="red") +
facet_wrap(~contrast, ncol=4) + # , scales="free"
geom_vline(xintercept=c(-1.5,1.5), color="red", alpha=1.0)+
geom_hline(yintercept=-log10(0.05), color="blue", alpha=1.0) +
scale_x_continuous(name = "log2 Fold Change", limits = c(-5,5)) + #
scale_y_continuous(name = "-log10 adjusted p-value", limits = c(0,6)) #
## Warning: Removed 672 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_point).
###########################################################################################
######## PRODUCE INPUT FOR BMDExpress2 ####################################################
###########################################################################################
# Run this code conditional on whether there is a column matching the word dose in the metadata
if(any(grepl(x=colnames(DESeqDesign), pattern="dose", ignore.case = T))){
doseCol <- grep(x=colnames(DESeqDesign), pattern="dose", ignore.case = T)
# Create input file...
lognorm.read.counts <- as.data.frame(counts(dds, normalized=TRUE))
# rld normalized, size factor normalized, rounded to 4 significant figures
bmdexpress <- fastbmd <- as.data.frame(lognorm.read.counts)
bmdexpress <- cbind(SampleID=row.names(bmdexpress), bmdexpress, stringsAsFactors=F)
bmdexpress <- rbind( Dose=c("#CLASS:DOSE", as.numeric(DESeqDesign[,doseCol])), bmdexpress, stringsAsFactors=F)
# Create input for fastbmd also
fastbmd <- cbind(SampleID=row.names(fastbmd), fastbmd, stringsAsFactors=F)
# Optionally... you can add a second variable name (e.g., chemical) for fastbmd only
# fastbmd <- rbind( Chemical=c("#CLASS:CHEMICAL",as.character(DESeqDesign[DESeqDesign$original_names %in% colnames(fastbmd),]$chemical)),
# fastbmd,
# stringsAsFactors=F)
fastbmd <- rbind( Dose=c("#CLASS:DOSE",format(DESeqDesign[DESeqDesign$original_names %in% colnames(fastbmd),]$dose, scientific=F)),
fastbmd,
stringsAsFactors=F)
if(!is.na(params$group_facet)) {
write.table(bmdexpress,
file=file.path(paths$DEG_output, paste0("bmdexpress_input_", params$group_filter, ".txt")),
quote = F,
sep = "\t",
row.names = F,
col.names = T)
write.table(fastbmd,
file=file.path(paths$DEG_output, paste0("fastbmd_input_", params$group_filter, ".txt")),
quote = F,
sep = "\t",
row.names = F,
col.names = T)
} else {
write.table(bmdexpress,
file=file.path(paths$DEG_output, "bmdexpress_input.txt"),
quote = F,
sep = "\t",
row.names = F,
col.names = T)
write.table(fastbmd,
file=file.path(paths$DEG_output, "fastbmd_input.txt"),
quote = F,
sep = "\t",
row.names = F,
col.names = T)
}
}
This section performs GO enrichment on all DEGs passing filters. The background set of genes is those that were identified in this sequencing experiment. The clusterProfiler package is used for this analysis.
## Remember that dds had ENSEMBL ids for the genes
# ensemblNames <- gsub("\\..*", "", rownames(dds))
ensemblDEGs <- significantResults[[biomart_filter]]
head(ensemblDEGs)
## [1] "ENSRNOG00000002946" "ENSRNOG00000013090" "ENSRNOG00000020775" "ENSRNOG00000032560" "ENSRNOG00000033680"
## [6] "ENSRNOG00000051171"
DEGs <- dplyr::left_join(data.frame(genes=ensemblDEGs), id_table_entrez, by=c("genes" = biomart_filter))
head(DEGs)
## genes external_gene_name
## 1 ENSRNOG00000002946 Socs3
## 2 ENSRNOG00000013090 Gadd45g
## 3 ENSRNOG00000020775 Cyp2b2
## 4 ENSRNOG00000032560 Cyp3a23/3a1
## 5 ENSRNOG00000032560 Cyp3a23/3a1
## 6 ENSRNOG00000032560 Cyp3a23/3a1
## description entrezgene_id
## 1 suppressor of cytokine signaling 3 [Source:RGD Symbol;Acc:621087] 89829
## 2 growth arrest and DNA-damage-inducible, gamma [Source:RGD Symbol;Acc:1311796] 291005
## 3 cytochrome P450, family 2, subfamily b, polypeptide 2 [Source:RGD Symbol;Acc:2467] 361523
## 4 cytochrome P450, family 3, subfamily a, polypeptide 23/polypeptide 1 [Source:RGD Symbol;Acc:628626] 100910877
## 5 cytochrome P450, family 3, subfamily a, polypeptide 23/polypeptide 1 [Source:RGD Symbol;Acc:628626] 266682
## 6 cytochrome P450, family 3, subfamily a, polypeptide 23/polypeptide 1 [Source:RGD Symbol;Acc:628626] 25642
## entrezgene_accession
## 1 Socs3
## 2 Gadd45g
## 3 Cyp2b2
## 4 LOC100910877
## 5 Cyp3a2
## 6 Cyp3a23/3a1
entrezDEGs <- as.character(DEGs$entrezgene_id)
entrezDEGs <- entrezDEGs[!is.na(entrezDEGs)]
#### DEFINE INPUTS
myDEGs <- entrezDEGs
# background <- row.names(assay(dds))
background <- dplyr::left_join(data.frame(genes=row.names(assay(dds))), id_table_entrez, by=c("genes"=biomart_filter)) %>%
dplyr::filter(!is.na(entrezgene_id)) %>%
dplyr::pull(entrezgene_id)
background <- as.character(background)
entrezDEGs <- entrezDEGs[!is.na(entrezDEGs)]
##### TO DO: Add multiple panels for each contrast.
##### Use "all" argument to only run each once..?
## Not all genes have a p-value
table(!is.na(resList[[1]]$padj))
##
## TRUE
## 10666
KeyType <- "ENTREZID" # ENSEMBL? ACCNUM? GID? ENTREZID?
# head(keys(orgdb, keytype="ENTREZID"))
# Run all at once...
# enrich_go_all <- enrichGO(gene = myDEGs,
# universe = background, # All genes in dataset
# OrgDb = orgdb,
# keyType = KeyType,
# readable=T,
# ont = "all",
# pAdjustMethod = "BH",
# pvalueCutoff = 0.01,
# qvalueCutoff = 0.05)
# clusterProfiler::dotplot(enrich_go_all, font.size=9, showCategory=10, split="ONTOLOGY") +
# theme(axis.text.y = element_text(angle = 0)) +
# scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 50)) +
# facet_wrap(~ONTOLOGY)
## Can also do individually...
## Perform enrichment analysis for Biological Process (BP)
enrich_go_bp <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = orgdb,
keyType = KeyType,
readable=T,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrich_go_mf <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = orgdb,
keyType = KeyType,
readable=T,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
enrich_go_cc <- enrichGO(gene = myDEGs,
universe = background, # All genes in dataset
OrgDb = orgdb,
keyType = KeyType,
ont = "CC",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
if (nrow(enrich_go_bp@result %>% filter(p.adjust < 0.01 & qvalue < 0.05)) > 0) {
plot1 <- clusterProfiler::dotplot(enrich_go_bp, font.size=9, showCategory=20) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
DEGs_full <- dplyr::left_join(significantResults,
id_table_entrez,
by=biomart_filter) %>%
distinct()
foldChanges <- DEGs_full %>% dplyr::pull(log2FoldChange)
names(foldChanges) <- DEGs_full %>% dplyr::pull(entrezgene_id)
foldChanges <- foldChanges %>% sort() %>% rev()
foldChanges <- foldChanges[!is.na(names(foldChanges))]
simplified_bp <- simplify(enrich_go_bp, cutoff = 0.7, by = "p.adjust", select_fun = min)
# Max size of gene set... useful for visualizing networks
simplified_bp_for_network <- simplified_bp %>% DOSE::gsfilter(by="GSSize", max=400)
# I believe there is a bug in the current version of DOSE or clusterProfiler that prevents this from working
# Throws this error on plotting: Error in graph.data.frame(x, directed = FALSE) : the data frame should contain at least two columns
simplified_bp_filt <- simplified_bp %>%
filter(p.adjust < .05, qvalue < 0.05) %>%
mutate(GeneRatio = DOSE::parse_ratio(GeneRatio)) %>%
arrange(desc(GeneRatio))
show3 <- simplified_bp_filt$Description[1:3] # Top 3
show3 <- show3[!is.na(show3)]
show5 <- simplified_bp_filt$Description[1:5] # Top 5
show5 <- show5[!is.na(show5)]
show10 <- simplified_bp_filt$Description[1:10] # Top 10
show10 <- show10[!is.na(show10)]
plot2 <- cnetplot(simplified_bp,
foldChange=foldChanges,
showCategory = show10,
cex_label_gene=0.3)
plot3 <- upsetplot(enrich_go_bp)
plot4 <- heatplot(simplified_bp, foldChange=foldChanges, showCategory = show10) +
scale_y_discrete(labels = function(x) stringr::str_trunc(x, width = 50))
print(plot1)
print(plot2)
print(plot3)
print(plot4)
} else { print("No significantly enriched terms using criteria selected") }
if (nrow(enrich_go_mf@result %>% filter(p.adjust < 0.01 & qvalue < 0.05)) > 0) {
plot1 <- clusterProfiler::dotplot(enrich_go_mf, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
simplified_mf <- simplify(enrich_go_mf, cutoff = 0.7, by = "p.adjust", select_fun = min)
# Max size of gene set... useful for visualizing networks
simplified_mf_for_network <- simplified_mf %>% DOSE::gsfilter(by="GSSize", max=400)
# I believe there is a bug in the current version of DOSE or clusterProfiler that prevents this from working
# Throws this error on plotting: Error in graph.data.frame(x, directed = FALSE) : the data frame should contain at least two columns
simplified_mf_filt <- simplified_mf %>%
filter(p.adjust < .05, qvalue < 0.05) %>%
mutate(GeneRatio = DOSE::parse_ratio(GeneRatio)) %>%
arrange(desc(GeneRatio))
show3 <- simplified_mf_filt$Description[1:3] # Top 3
show5 <- simplified_mf_filt$Description[1:5] # Top 5
show10 <- simplified_mf_filt$Description[1:10] # Top 10
plot2 <- cnetplot(simplified_mf,
foldChange=foldChanges,
showCategory = show10,
cex_label_gene=0.3)
enrich_go_mf_upset <- enrich_go_mf
enrich_go_mf_upset@result$Description <- str_trunc(enrich_go_mf_upset@result$Description, width = 50)
plot3 <- upsetplot(enrich_go_mf_upset)
plot4 <- enrichplot::heatplot(simplified_mf, foldChange=foldChanges, showCategory = show10) +
scale_y_discrete(labels = function(x) stringr::str_trunc(x, width = 50))
print(plot1)
print(plot2)
print(plot3)
print(plot4)
} else { print("No significantly enriched terms using criteria selected") }
if (nrow(enrich_go_cc@result %>% filter(p.adjust < 0.01 & qvalue < 0.05)) > 0) {
plot1 <- clusterProfiler::dotplot(enrich_go_cc, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
simplified_cc <- simplify(enrich_go_cc, cutoff = 0.7, by = "p.adjust", select_fun = min)
# Max size of gene set... useful for visualizing networks
simplified_cc_for_network <- simplified_cc %>% DOSE::gsfilter(by="GSSize", max=400)
# I believe there is a bug in the current version of DOSE or clusterProfiler that prevents this from working
# Throws this error on plotting: Error in graph.data.frame(x, directed = FALSE) : the data frame should contain at least two columns
simplified_cc_filt <- simplified_cc %>%
filter(p.adjust < .05, qvalue < 0.05) %>%
mutate(GeneRatio = DOSE::parse_ratio(GeneRatio)) %>%
arrange(desc(GeneRatio))
show3 <- simplified_cc_filt$Description[1:3] # Top 3
show5 <- simplified_cc_filt$Description[1:5] # Top 5
show10 <- simplified_cc_filt$Description[1:10] # Top 10
plot2 <- cnetplot(simplified_cc,
foldChange=foldChanges,
showCategory = show10,
cex_label_gene=0.3)
enrich_go_cc_upset <- enrich_go_cc
enrich_go_cc_upset@result$Description <- str_trunc(enrich_go_cc_upset@result$Description, width = 50)
plot3 <- upsetplot(enrich_go_cc_upset)
plot4 <- enrichplot::heatplot(simplified_cc, foldChange=foldChanges, showCategory = show10) +
scale_y_discrete(labels = function(x) stringr::str_trunc(x, width = 50))
print(plot1)
print(plot2)
print(plot3)
print(plot4)
} else { print("No significantly enriched terms using criteria selected") }
## [1] "No significantly enriched terms using criteria selected"
This section uses clusterProfiler to detect enriched gene sets using WikiPathways data.
entrez_IDs <- as.character(id_table_entrez$entrezgene_id)
entrezDEGs <- dplyr::left_join(significantResults,
id_table_entrez,
by=biomart_filter)
entrezDEGs <- as.vector(entrezDEGs$entrezgene_id)
Wiki <- rWikiPathways::downloadPathwayArchive(organism=species_sci, format = "gmt", destpath = paths$DEG_output)
wp2gene <- clusterProfiler::read.gmt(file.path(paths$DEG_output,Wiki))
wp2gene <- wp2gene %>% tidyr::separate(1, c("name","version","wpid","org"), "%")
wpid2gene <- wp2gene %>% dplyr::select(wpid,gene) #TERM2GENE
wpid2name <- wp2gene %>% dplyr::select(wpid,name) #TERM2NAME
wpid2gene
## wpid gene
## 1 WP124 108348093
## 2 WP124 100910144
## 3 WP124 396552
## 4 WP124 396551
## 5 WP124 100365112
## 6 WP124 65036
## 7 WP124 312382
## 8 WP124 24861
## 9 WP124 396527
## 10 WP124 25303
## 11 WP124 24346
## 12 WP124 154516
## 13 WP124 24565
## 14 WP124 501233
## 15 WP124 574523
## 16 WP124 29225
## 17 WP124 171118
## 18 WP124 501232
## 19 WP124 113992
## 20 WP124 100125372
## 21 WP124 301595
## 22 WP1276 289632
## 23 WP1276 396552
## 24 WP1276 396551
## 25 WP1276 289827
## 26 WP1276 286954
## 27 WP1276 24861
## 28 WP1276 83808
## 29 WP1276 396527
## 30 WP1276 154516
## 31 WP1276 286989
## 32 WP1276 24645
## 33 WP1276 363109
## 34 WP1276 574523
## 35 WP1276 113992
## 36 WP1276 83472
## 37 WP1276 305264
## 38 WP1276 679990
## 39 WP1276 301595
## 40 WP1277 64012
## 41 WP1277 64046
## 42 WP1277 363251
## 43 WP1277 360748
## 44 WP1277 25019
## 45 WP1277 363247
## 46 WP1277 309995
## 47 WP1278 29538
## 48 WP1278 313121
## 49 WP1278 246756
## 50 WP1278 81736
## 51 WP1278 299331
## 52 WP1278 311245
## 53 WP1278 286908
## 54 WP1278 309165
## 55 WP1278 24481
## 56 WP1278 360640
## 57 WP1278 309452
## 58 WP1278 309361
## 59 WP1278 116554
## 60 WP1278 81780
## 61 WP1278 84351
## 62 WP1278 103694380
## 63 WP1278 116590
## 64 WP1279 288588
## 65 WP1279 266713
## 66 WP1279 497672
## 67 WP1279 297893
## 68 WP1279 83828
## 69 WP1279 81649
## 70 WP1279 314322
## 71 WP1279 690966
## 72 WP1279 314436
## 73 WP1279 24890
## 74 WP1279 170922
## 75 WP1279 682902
## 76 WP1279 363287
## 77 WP1279 58919
## 78 WP1279 289561
## 79 WP1279 292778
## 80 WP1279 293621
## 81 WP1279 294236
## 82 WP1279 303918
## 83 WP1279 24224
## 84 WP1279 117526
## 85 WP1279 25636
## 86 WP1279 54244
## 87 WP1279 83503
## 88 WP1279 307485
## 89 WP1279 81646
## 90 WP1279 24185
## 91 WP1279 117017
## 92 WP1279 308415
## 93 WP1279 84351
## 94 WP1279 498109
## 95 WP1279 367858
## 96 WP1279 24516
## 97 WP1279 306516
## 98 WP1279 81504
## 99 WP1279 83805
## 100 WP1279 361365
## 101 WP1279 314612
## 102 WP1279 81673
## 103 WP1279 313845
## 104 WP1279 84389
## 105 WP1279 24790
## 106 WP1279 81674
## 107 WP1279 103690054
## 108 WP1279 363067
## 109 WP1279 680149
## 110 WP1279 171150
## 111 WP1279 84582
## 112 WP1279 50658
## 113 WP1279 116590
## 114 WP1279 84581
## 115 WP1279 288651
## 116 WP1279 100363500
## 117 WP1279 288533
## 118 WP1279 373541
## 119 WP1279 363481
## 120 WP1279 294018
## 121 WP1279 361580
## 122 WP1279 24400
## 123 WP1279 103695118
## 124 WP1279 81736
## 125 WP1279 294693
## 126 WP1279 310784
## 127 WP1279 170851
## 128 WP1279 171104
## 129 WP1279 84577
## 130 WP1279 309361
## 131 WP1279 291703
## 132 WP1279 363633
## 133 WP1280 24422
## 134 WP1280 24314
## 135 WP1280 29739
## 136 WP1280 83619
## 137 WP1280 117519
## 138 WP1280 313633
## 139 WP1280 494499
## 140 WP1280 25283
## 141 WP1280 24680
## 142 WP1280 24253
## 143 WP1280 116554
## 144 WP1280 24451
## 145 WP1280 54267
## 146 WP1280 288480
## 147 WP1281 294504
## 148 WP1281 24318
## 149 WP1281 305509
## 150 WP1281 24410
## 151 WP1281 24412
## 152 WP1281 24411
## 153 WP1281 24414
## 154 WP1281 29629
## 155 WP1281 29628
## 156 WP1281 24316
## 157 WP1281 29627
## 158 WP1281 29241
## 159 WP1281 362317
## 160 WP1281 361842
## 161 WP1281 116590
## 162 WP1281 100363500
## 163 WP1281 24408
## 164 WP1281 25432
## 165 WP1281 24245
## 166 WP1281 29437
## 167 WP1281 25636
## 168 WP1281 25050
## 169 WP1281 314988
## 170 WP1281 170851
## 171 WP1281 81646
## 172 WP1281 58960
## 173 WP1281 307861
## 174 WP1281 50689
## 175 WP1282 171347
## 176 WP1282 81676
## 177 WP1282 690050
## 178 WP1282 368066
## 179 WP1282 24661
## 180 WP1282 25331
## 181 WP1282 300691
## 182 WP1282 24267
## 183 WP1283 300677
## 184 WP1283 679739
## 185 WP1283 293655
## 186 WP1283 81728
## 187 WP1283 293652
## 188 WP1283 293453
## 189 WP1283 293130
## 190 WP1283 102553861
## 191 WP1283 499529
## 192 WP1283 26194
## 193 WP1283 26197
## 194 WP1283 689938
## 195 WP1283 26199
## 196 WP1283 361385
## 197 WP1283 299954
## 198 WP1283 114630
## 199 WP1283 362837
## 200 WP1283 690441
## 201 WP1283 171375
## 202 WP1283 681418
## 203 WP1283 171374
## 204 WP1283 26193
## 205 WP1283 689271
## 206 WP1283 301458
## 207 WP1283 140608
## 208 WP1283 295923
## 209 WP1283 296658
## 210 WP1283 26203
## 211 WP1283 26202
## 212 WP1283 100361468
## 213 WP1283 94271
## 214 WP1283 26204
## 215 WP1283 171528
## 216 WP1283 316632
## 217 WP1283 681024
## 218 WP1283 299643
## 219 WP1283 29754
## 220 WP1283 29478
## 221 WP1283 362440
## 222 WP1283 26201
## 223 WP1283 26200
## 224 WP1283 192241
## 225 WP1283 245958
## 226 WP1283 171082
## 227 WP1283 289218
## 228 WP1283 294964
## 229 WP1283 293991
## 230 WP1283 25488
## 231 WP1283 64539
## 232 WP1283 297990
## 233 WP1283 83615
## 234 WP1283 65262
## 235 WP1283 100363268
## 236 WP1283 100912599
## 237 WP1283 302526
## 238 WP1283 315167
## 239 WP1283 291660
## 240 WP1283 245965
## 241 WP1283 362588
## 242 WP1283 362344
## 243 WP1283 362749
## 244 WP1283 301123
## 245 WP1284 298947
## 246 WP1284 24918
## 247 WP1284 25125
## 248 WP1284 25124
## 249 WP1284 24335
## 250 WP1284 25126
## 251 WP1284 25467
## 252 WP1284 81504
## 253 WP1284 24336
## 254 WP1284 24699
## 255 WP1284 24514
## 256 WP1284 83805
## 257 WP1284 116551
## 258 WP1284 313845
## 259 WP1284 116590
## 260 WP1284 24703
## 261 WP1284 367264
## 262 WP1284 25676
## 263 WP1284 252971
## 264 WP1284 29376
## 265 WP1284 170851
## 266 WP1284 116680
## 267 WP1284 24185
## 268 WP1284 58960
## 269 WP1284 83681
## 270 WP1284 85385
## 271 WP1284 50689
## 272 WP1285 29277
## 273 WP1285 171335
## 274 WP1285 65030
## 275 WP1285 252934
## 276 WP1286 290623
## 277 WP1286 286921
## 278 WP1286 293451
## 279 WP1286 58953
## 280 WP1286 500257
## 281 WP1286 362035
## 282 WP1286 500892
## 283 WP1286 499689
## 284 WP1286 304322
## 285 WP1286 302302
## 286 WP1286 307838
## 287 WP1286 24421
## 288 WP1286 297029
## 289 WP1286 25355
## 290 WP1286 500359
## 291 WP1286 24267
## 292 WP1286 154516
## 293 WP1286 24423
## 294 WP1286 296972
## 295 WP1286 24422
## 296 WP1286 316435
## 297 WP1286 24424
## 298 WP1286 24426
## 299 WP1286 360268
## 300 WP1286 361510
## 301 WP1286 361631
## 302 WP1286 309465
## 303 WP1286 311257
## 304 WP1286 299566
## 305 WP1286 54246
## 306 WP1286 246245
## 307 WP1286 310848
## 308 WP1286 305264
## 309 WP1286 78959
## 310 WP1286 192242
## 311 WP1286 24912
## 312 WP1286 396552
## 313 WP1286 294449
## 314 WP1286 396551
## 315 WP1286 300850
## 316 WP1286 295934
## 317 WP1286 307092
## 318 WP1286 81869
## 319 WP1286 114846
## 320 WP1286 24192
## 321 WP1286 494499
## 322 WP1286 116632
## 323 WP1286 116631
## 324 WP1286 84468
## 325 WP1286 499302
## 326 WP1286 499422
## 327 WP1286 681913
## 328 WP1286 103690051
## 329 WP1286 691394
## 330 WP1286 303218
## 331 WP1286 108348061
## 332 WP1286 302489
## 333 WP1286 286954
## 334 WP1286 308190
## 335 WP1286 58981
## 336 WP1286 292155
## 337 WP1286 25458
## 338 WP1286 24404
## 339 WP1286 113992
## 340 WP1286 362228
## 341 WP1286 65030
## 342 WP1286 308511
## 343 WP1286 303669
## 344 WP1286 293779
## 345 WP1286 368066
## 346 WP1286 690050
## 347 WP1286 25429
## 348 WP1286 100910462
## 349 WP1286 64352
## 350 WP1286 288108
## 351 WP1286 498166
## 352 WP1286 25147
## 353 WP1286 25146
## 354 WP1286 114700
## 355 WP1286 25426
## 356 WP1286 295430
## 357 WP1286 25428
## 358 WP1286 81924
## 359 WP1286 25427
## 360 WP1286 24294
## 361 WP1286 84406
## 362 WP1286 24296
## 363 WP1286 24297
## 364 WP1286 24298
## 365 WP1286 308445
## 366 WP1286 24902
## 367 WP1286 361718
## 368 WP1286 24861
## 369 WP1286 25279
## 370 WP1286 314694
## 371 WP1286 353498
## 372 WP1286 25315
## 373 WP1286 171445
## 374 WP1286 26760
## 375 WP1286 574523
## 376 WP1286 29633
## 377 WP1286 363618
## 378 WP1286 116686
## 379 WP1286 300691
## 380 WP1286 83783
## 381 WP1286 140568
## 382 WP1286 684979
## 383 WP1286 301264
## 384 WP1286 154985
## 385 WP1286 29725
## 386 WP1286 246247
## 387 WP1286 29328
## 388 WP1286 246248
## 389 WP1286 286989
## 390 WP1286 291770
## 391 WP1286 29326
## 392 WP1286 246767
## 393 WP1286 316325
## 394 WP1286 81676
## 395 WP1286 25086
## 396 WP1286 310855
## 397 WP1286 100910526
## 398 WP1286 171072
## 399 WP1286 29680
## 400 WP1286 65185
## 401 WP1286 301595
## 402 WP1286 301517
## 403 WP1286 307859
## 404 WP1286 25256
## 405 WP1286 396527
## 406 WP1286 64305
## 407 WP1286 312495
## 408 WP1286 29738
## 409 WP1286 685402
## 410 WP1286 362782
## 411 WP1286 108348148
## 412 WP1286 498489
## 413 WP1286 171341
## 414 WP1286 289197
## 415 WP1286 84493
## 416 WP1286 292915
## 417 WP1287 293779
## 418 WP1287 311569
## 419 WP1288 64033
## 420 WP1288 315294
## 421 WP1288 300438
## 422 WP1288 161452
## 423 WP1288 315298
## 424 WP1288 317674
## 425 WP1288 64512
## 426 WP1288 24577
## 427 WP1288 64558
## 428 WP1288 266715
## 429 WP1288 312781
## 430 WP1288 29508
## 431 WP1288 315648
## 432 WP1288 84006
## 433 WP1288 117104
## 434 WP1288 25023
## 435 WP1288 363122
## 436 WP1288 58919
## 437 WP1288 29382
## 438 WP1288 64566
## 439 WP1288 81717
## 440 WP1288 313121
## 441 WP1288 54244
## 442 WP1288 25193
## 443 WP1288 25272
## 444 WP1288 500047
## 445 WP1288 84353
## 446 WP1288 282581
## 447 WP1288 282582
## 448 WP1288 414065
## 449 WP1288 24516
## 450 WP1288 499593
## 451 WP1288 315196
## 452 WP1288 294562
## 453 WP1288 24673
## 454 WP1288 25445
## 455 WP1288 25522
## 456 WP1288 25406
## 457 WP1288 81749
## 458 WP1288 84426
## 459 WP1288 308068
## 460 WP1288 691318
## 461 WP1288 25682
## 462 WP1288 84027
## 463 WP1288 24672
## 464 WP1288 114850
## 465 WP1288 83572
## 466 WP1288 303251
## 467 WP1288 497961
## 468 WP1288 364754
## 469 WP1288 364952
## 470 WP1288 117281
## 471 WP1288 50658
## 472 WP1288 303811
## 473 WP1288 293649
## 474 WP1288 25619
## 475 WP1288 266608
## 476 WP1288 501506
## 477 WP1288 24882
## 478 WP1288 25335
## 479 WP1288 311163
## 480 WP1288 170538
## 481 WP1288 295341
## 482 WP1288 24842
## 483 WP1288 24205
## 484 WP1288 312451
## 485 WP1288 311881
## 486 WP1288 103694903
## 487 WP1288 58822
## 488 WP1288 29134
## 489 WP1288 316527
## 490 WP1288 58868
## 491 WP1288 299147
## 492 WP1288 316526
## 493 WP1288 362102
## 494 WP1288 140584
## 495 WP1288 24680
## 496 WP1288 24881
## 497 WP1288 83721
## 498 WP1288 116466
## 499 WP1288 303181
## 500 WP1288 114487
## [ reached 'max' / getOption("max.print") -- omitted 6052 rows ]
wpid2name
## wpid name
## 1 WP124 Irinotecan Pathway
## 2 WP124 Irinotecan Pathway
## 3 WP124 Irinotecan Pathway
## 4 WP124 Irinotecan Pathway
## 5 WP124 Irinotecan Pathway
## 6 WP124 Irinotecan Pathway
## 7 WP124 Irinotecan Pathway
## 8 WP124 Irinotecan Pathway
## 9 WP124 Irinotecan Pathway
## 10 WP124 Irinotecan Pathway
## 11 WP124 Irinotecan Pathway
## 12 WP124 Irinotecan Pathway
## 13 WP124 Irinotecan Pathway
## 14 WP124 Irinotecan Pathway
## 15 WP124 Irinotecan Pathway
## 16 WP124 Irinotecan Pathway
## 17 WP124 Irinotecan Pathway
## 18 WP124 Irinotecan Pathway
## 19 WP124 Irinotecan Pathway
## 20 WP124 Irinotecan Pathway
## 21 WP124 Irinotecan Pathway
## 22 WP1276 Glucuronidation
## 23 WP1276 Glucuronidation
## 24 WP1276 Glucuronidation
## 25 WP1276 Glucuronidation
## 26 WP1276 Glucuronidation
## 27 WP1276 Glucuronidation
## 28 WP1276 Glucuronidation
## 29 WP1276 Glucuronidation
## 30 WP1276 Glucuronidation
## 31 WP1276 Glucuronidation
## 32 WP1276 Glucuronidation
## 33 WP1276 Glucuronidation
## 34 WP1276 Glucuronidation
## 35 WP1276 Glucuronidation
## 36 WP1276 Glucuronidation
## 37 WP1276 Glucuronidation
## 38 WP1276 Glucuronidation
## 39 WP1276 Glucuronidation
## 40 WP1277 Non-homologous end joining
## 41 WP1277 Non-homologous end joining
## 42 WP1277 Non-homologous end joining
## 43 WP1277 Non-homologous end joining
## 44 WP1277 Non-homologous end joining
## 45 WP1277 Non-homologous end joining
## 46 WP1277 Non-homologous end joining
## 47 WP1278 EBV LMP1 signaling
## 48 WP1278 EBV LMP1 signaling
## 49 WP1278 EBV LMP1 signaling
## 50 WP1278 EBV LMP1 signaling
## 51 WP1278 EBV LMP1 signaling
## 52 WP1278 EBV LMP1 signaling
## 53 WP1278 EBV LMP1 signaling
## 54 WP1278 EBV LMP1 signaling
## 55 WP1278 EBV LMP1 signaling
## 56 WP1278 EBV LMP1 signaling
## 57 WP1278 EBV LMP1 signaling
## 58 WP1278 EBV LMP1 signaling
## 59 WP1278 EBV LMP1 signaling
## 60 WP1278 EBV LMP1 signaling
## 61 WP1278 EBV LMP1 signaling
## 62 WP1278 EBV LMP1 signaling
## 63 WP1278 EBV LMP1 signaling
## 64 WP1279 Estrogen signaling
## 65 WP1279 Estrogen signaling
## 66 WP1279 Estrogen signaling
## 67 WP1279 Estrogen signaling
## 68 WP1279 Estrogen signaling
## 69 WP1279 Estrogen signaling
## 70 WP1279 Estrogen signaling
## 71 WP1279 Estrogen signaling
## 72 WP1279 Estrogen signaling
## 73 WP1279 Estrogen signaling
## 74 WP1279 Estrogen signaling
## 75 WP1279 Estrogen signaling
## 76 WP1279 Estrogen signaling
## 77 WP1279 Estrogen signaling
## 78 WP1279 Estrogen signaling
## 79 WP1279 Estrogen signaling
## 80 WP1279 Estrogen signaling
## 81 WP1279 Estrogen signaling
## 82 WP1279 Estrogen signaling
## 83 WP1279 Estrogen signaling
## 84 WP1279 Estrogen signaling
## 85 WP1279 Estrogen signaling
## 86 WP1279 Estrogen signaling
## 87 WP1279 Estrogen signaling
## 88 WP1279 Estrogen signaling
## 89 WP1279 Estrogen signaling
## 90 WP1279 Estrogen signaling
## 91 WP1279 Estrogen signaling
## 92 WP1279 Estrogen signaling
## 93 WP1279 Estrogen signaling
## 94 WP1279 Estrogen signaling
## 95 WP1279 Estrogen signaling
## 96 WP1279 Estrogen signaling
## 97 WP1279 Estrogen signaling
## 98 WP1279 Estrogen signaling
## 99 WP1279 Estrogen signaling
## 100 WP1279 Estrogen signaling
## 101 WP1279 Estrogen signaling
## 102 WP1279 Estrogen signaling
## 103 WP1279 Estrogen signaling
## 104 WP1279 Estrogen signaling
## 105 WP1279 Estrogen signaling
## 106 WP1279 Estrogen signaling
## 107 WP1279 Estrogen signaling
## 108 WP1279 Estrogen signaling
## 109 WP1279 Estrogen signaling
## 110 WP1279 Estrogen signaling
## 111 WP1279 Estrogen signaling
## 112 WP1279 Estrogen signaling
## 113 WP1279 Estrogen signaling
## 114 WP1279 Estrogen signaling
## 115 WP1279 Estrogen signaling
## 116 WP1279 Estrogen signaling
## 117 WP1279 Estrogen signaling
## 118 WP1279 Estrogen signaling
## 119 WP1279 Estrogen signaling
## 120 WP1279 Estrogen signaling
## 121 WP1279 Estrogen signaling
## 122 WP1279 Estrogen signaling
## 123 WP1279 Estrogen signaling
## 124 WP1279 Estrogen signaling
## 125 WP1279 Estrogen signaling
## 126 WP1279 Estrogen signaling
## 127 WP1279 Estrogen signaling
## 128 WP1279 Estrogen signaling
## 129 WP1279 Estrogen signaling
## 130 WP1279 Estrogen signaling
## 131 WP1279 Estrogen signaling
## 132 WP1279 Estrogen signaling
## 133 WP1280 Keap1-Nrf2
## 134 WP1280 Keap1-Nrf2
## 135 WP1280 Keap1-Nrf2
## 136 WP1280 Keap1-Nrf2
## 137 WP1280 Keap1-Nrf2
## 138 WP1280 Keap1-Nrf2
## 139 WP1280 Keap1-Nrf2
## 140 WP1280 Keap1-Nrf2
## 141 WP1280 Keap1-Nrf2
## 142 WP1280 Keap1-Nrf2
## 143 WP1280 Keap1-Nrf2
## 144 WP1280 Keap1-Nrf2
## 145 WP1280 Keap1-Nrf2
## 146 WP1280 Keap1-Nrf2
## 147 WP1281 Hypothetical Network for Drug Addiction
## 148 WP1281 Hypothetical Network for Drug Addiction
## 149 WP1281 Hypothetical Network for Drug Addiction
## 150 WP1281 Hypothetical Network for Drug Addiction
## 151 WP1281 Hypothetical Network for Drug Addiction
## 152 WP1281 Hypothetical Network for Drug Addiction
## 153 WP1281 Hypothetical Network for Drug Addiction
## 154 WP1281 Hypothetical Network for Drug Addiction
## 155 WP1281 Hypothetical Network for Drug Addiction
## 156 WP1281 Hypothetical Network for Drug Addiction
## 157 WP1281 Hypothetical Network for Drug Addiction
## 158 WP1281 Hypothetical Network for Drug Addiction
## 159 WP1281 Hypothetical Network for Drug Addiction
## 160 WP1281 Hypothetical Network for Drug Addiction
## 161 WP1281 Hypothetical Network for Drug Addiction
## 162 WP1281 Hypothetical Network for Drug Addiction
## 163 WP1281 Hypothetical Network for Drug Addiction
## 164 WP1281 Hypothetical Network for Drug Addiction
## 165 WP1281 Hypothetical Network for Drug Addiction
## 166 WP1281 Hypothetical Network for Drug Addiction
## 167 WP1281 Hypothetical Network for Drug Addiction
## 168 WP1281 Hypothetical Network for Drug Addiction
## 169 WP1281 Hypothetical Network for Drug Addiction
## 170 WP1281 Hypothetical Network for Drug Addiction
## 171 WP1281 Hypothetical Network for Drug Addiction
## 172 WP1281 Hypothetical Network for Drug Addiction
## 173 WP1281 Hypothetical Network for Drug Addiction
## 174 WP1281 Hypothetical Network for Drug Addiction
## 175 WP1282 Methylation
## 176 WP1282 Methylation
## 177 WP1282 Methylation
## 178 WP1282 Methylation
## 179 WP1282 Methylation
## 180 WP1282 Methylation
## 181 WP1282 Methylation
## 182 WP1282 Methylation
## 183 WP1283 Oxidative phosphorylation
## 184 WP1283 Oxidative phosphorylation
## 185 WP1283 Oxidative phosphorylation
## 186 WP1283 Oxidative phosphorylation
## 187 WP1283 Oxidative phosphorylation
## 188 WP1283 Oxidative phosphorylation
## 189 WP1283 Oxidative phosphorylation
## 190 WP1283 Oxidative phosphorylation
## 191 WP1283 Oxidative phosphorylation
## 192 WP1283 Oxidative phosphorylation
## 193 WP1283 Oxidative phosphorylation
## 194 WP1283 Oxidative phosphorylation
## 195 WP1283 Oxidative phosphorylation
## 196 WP1283 Oxidative phosphorylation
## 197 WP1283 Oxidative phosphorylation
## 198 WP1283 Oxidative phosphorylation
## 199 WP1283 Oxidative phosphorylation
## 200 WP1283 Oxidative phosphorylation
## 201 WP1283 Oxidative phosphorylation
## 202 WP1283 Oxidative phosphorylation
## 203 WP1283 Oxidative phosphorylation
## 204 WP1283 Oxidative phosphorylation
## 205 WP1283 Oxidative phosphorylation
## 206 WP1283 Oxidative phosphorylation
## 207 WP1283 Oxidative phosphorylation
## 208 WP1283 Oxidative phosphorylation
## 209 WP1283 Oxidative phosphorylation
## 210 WP1283 Oxidative phosphorylation
## 211 WP1283 Oxidative phosphorylation
## 212 WP1283 Oxidative phosphorylation
## 213 WP1283 Oxidative phosphorylation
## 214 WP1283 Oxidative phosphorylation
## 215 WP1283 Oxidative phosphorylation
## 216 WP1283 Oxidative phosphorylation
## 217 WP1283 Oxidative phosphorylation
## 218 WP1283 Oxidative phosphorylation
## 219 WP1283 Oxidative phosphorylation
## 220 WP1283 Oxidative phosphorylation
## 221 WP1283 Oxidative phosphorylation
## 222 WP1283 Oxidative phosphorylation
## 223 WP1283 Oxidative phosphorylation
## 224 WP1283 Oxidative phosphorylation
## 225 WP1283 Oxidative phosphorylation
## 226 WP1283 Oxidative phosphorylation
## 227 WP1283 Oxidative phosphorylation
## 228 WP1283 Oxidative phosphorylation
## 229 WP1283 Oxidative phosphorylation
## 230 WP1283 Oxidative phosphorylation
## 231 WP1283 Oxidative phosphorylation
## 232 WP1283 Oxidative phosphorylation
## 233 WP1283 Oxidative phosphorylation
## 234 WP1283 Oxidative phosphorylation
## 235 WP1283 Oxidative phosphorylation
## 236 WP1283 Oxidative phosphorylation
## 237 WP1283 Oxidative phosphorylation
## 238 WP1283 Oxidative phosphorylation
## 239 WP1283 Oxidative phosphorylation
## 240 WP1283 Oxidative phosphorylation
## 241 WP1283 Oxidative phosphorylation
## 242 WP1283 Oxidative phosphorylation
## 243 WP1283 Oxidative phosphorylation
## 244 WP1283 Oxidative phosphorylation
## 245 WP1284 EPO Receptor Signaling
## 246 WP1284 EPO Receptor Signaling
## 247 WP1284 EPO Receptor Signaling
## 248 WP1284 EPO Receptor Signaling
## 249 WP1284 EPO Receptor Signaling
## 250 WP1284 EPO Receptor Signaling
## 251 WP1284 EPO Receptor Signaling
## 252 WP1284 EPO Receptor Signaling
## 253 WP1284 EPO Receptor Signaling
## 254 WP1284 EPO Receptor Signaling
## 255 WP1284 EPO Receptor Signaling
## 256 WP1284 EPO Receptor Signaling
## 257 WP1284 EPO Receptor Signaling
## 258 WP1284 EPO Receptor Signaling
## 259 WP1284 EPO Receptor Signaling
## 260 WP1284 EPO Receptor Signaling
## 261 WP1284 EPO Receptor Signaling
## 262 WP1284 EPO Receptor Signaling
## 263 WP1284 EPO Receptor Signaling
## 264 WP1284 EPO Receptor Signaling
## 265 WP1284 EPO Receptor Signaling
## 266 WP1284 EPO Receptor Signaling
## 267 WP1284 EPO Receptor Signaling
## 268 WP1284 EPO Receptor Signaling
## 269 WP1284 EPO Receptor Signaling
## 270 WP1284 EPO Receptor Signaling
## 271 WP1284 EPO Receptor Signaling
## 272 WP1285 Arachidonate Epoxygenase Epoxide Hydrolase
## 273 WP1285 Arachidonate Epoxygenase Epoxide Hydrolase
## 274 WP1285 Arachidonate Epoxygenase Epoxide Hydrolase
## 275 WP1285 Arachidonate Epoxygenase Epoxide Hydrolase
## 276 WP1286 Metapathway biotransformation
## 277 WP1286 Metapathway biotransformation
## 278 WP1286 Metapathway biotransformation
## 279 WP1286 Metapathway biotransformation
## 280 WP1286 Metapathway biotransformation
## 281 WP1286 Metapathway biotransformation
## 282 WP1286 Metapathway biotransformation
## 283 WP1286 Metapathway biotransformation
## 284 WP1286 Metapathway biotransformation
## 285 WP1286 Metapathway biotransformation
## 286 WP1286 Metapathway biotransformation
## 287 WP1286 Metapathway biotransformation
## 288 WP1286 Metapathway biotransformation
## 289 WP1286 Metapathway biotransformation
## 290 WP1286 Metapathway biotransformation
## 291 WP1286 Metapathway biotransformation
## 292 WP1286 Metapathway biotransformation
## 293 WP1286 Metapathway biotransformation
## 294 WP1286 Metapathway biotransformation
## 295 WP1286 Metapathway biotransformation
## 296 WP1286 Metapathway biotransformation
## 297 WP1286 Metapathway biotransformation
## 298 WP1286 Metapathway biotransformation
## 299 WP1286 Metapathway biotransformation
## 300 WP1286 Metapathway biotransformation
## 301 WP1286 Metapathway biotransformation
## 302 WP1286 Metapathway biotransformation
## 303 WP1286 Metapathway biotransformation
## 304 WP1286 Metapathway biotransformation
## 305 WP1286 Metapathway biotransformation
## 306 WP1286 Metapathway biotransformation
## 307 WP1286 Metapathway biotransformation
## 308 WP1286 Metapathway biotransformation
## 309 WP1286 Metapathway biotransformation
## 310 WP1286 Metapathway biotransformation
## 311 WP1286 Metapathway biotransformation
## 312 WP1286 Metapathway biotransformation
## 313 WP1286 Metapathway biotransformation
## 314 WP1286 Metapathway biotransformation
## 315 WP1286 Metapathway biotransformation
## 316 WP1286 Metapathway biotransformation
## 317 WP1286 Metapathway biotransformation
## 318 WP1286 Metapathway biotransformation
## 319 WP1286 Metapathway biotransformation
## 320 WP1286 Metapathway biotransformation
## 321 WP1286 Metapathway biotransformation
## 322 WP1286 Metapathway biotransformation
## 323 WP1286 Metapathway biotransformation
## 324 WP1286 Metapathway biotransformation
## 325 WP1286 Metapathway biotransformation
## 326 WP1286 Metapathway biotransformation
## 327 WP1286 Metapathway biotransformation
## 328 WP1286 Metapathway biotransformation
## 329 WP1286 Metapathway biotransformation
## 330 WP1286 Metapathway biotransformation
## 331 WP1286 Metapathway biotransformation
## 332 WP1286 Metapathway biotransformation
## 333 WP1286 Metapathway biotransformation
## 334 WP1286 Metapathway biotransformation
## 335 WP1286 Metapathway biotransformation
## 336 WP1286 Metapathway biotransformation
## 337 WP1286 Metapathway biotransformation
## 338 WP1286 Metapathway biotransformation
## 339 WP1286 Metapathway biotransformation
## 340 WP1286 Metapathway biotransformation
## 341 WP1286 Metapathway biotransformation
## 342 WP1286 Metapathway biotransformation
## 343 WP1286 Metapathway biotransformation
## 344 WP1286 Metapathway biotransformation
## 345 WP1286 Metapathway biotransformation
## 346 WP1286 Metapathway biotransformation
## 347 WP1286 Metapathway biotransformation
## 348 WP1286 Metapathway biotransformation
## 349 WP1286 Metapathway biotransformation
## 350 WP1286 Metapathway biotransformation
## 351 WP1286 Metapathway biotransformation
## 352 WP1286 Metapathway biotransformation
## 353 WP1286 Metapathway biotransformation
## 354 WP1286 Metapathway biotransformation
## 355 WP1286 Metapathway biotransformation
## 356 WP1286 Metapathway biotransformation
## 357 WP1286 Metapathway biotransformation
## 358 WP1286 Metapathway biotransformation
## 359 WP1286 Metapathway biotransformation
## 360 WP1286 Metapathway biotransformation
## 361 WP1286 Metapathway biotransformation
## 362 WP1286 Metapathway biotransformation
## 363 WP1286 Metapathway biotransformation
## 364 WP1286 Metapathway biotransformation
## 365 WP1286 Metapathway biotransformation
## 366 WP1286 Metapathway biotransformation
## 367 WP1286 Metapathway biotransformation
## 368 WP1286 Metapathway biotransformation
## 369 WP1286 Metapathway biotransformation
## 370 WP1286 Metapathway biotransformation
## 371 WP1286 Metapathway biotransformation
## 372 WP1286 Metapathway biotransformation
## 373 WP1286 Metapathway biotransformation
## 374 WP1286 Metapathway biotransformation
## 375 WP1286 Metapathway biotransformation
## 376 WP1286 Metapathway biotransformation
## 377 WP1286 Metapathway biotransformation
## 378 WP1286 Metapathway biotransformation
## 379 WP1286 Metapathway biotransformation
## 380 WP1286 Metapathway biotransformation
## 381 WP1286 Metapathway biotransformation
## 382 WP1286 Metapathway biotransformation
## 383 WP1286 Metapathway biotransformation
## 384 WP1286 Metapathway biotransformation
## 385 WP1286 Metapathway biotransformation
## 386 WP1286 Metapathway biotransformation
## 387 WP1286 Metapathway biotransformation
## 388 WP1286 Metapathway biotransformation
## 389 WP1286 Metapathway biotransformation
## 390 WP1286 Metapathway biotransformation
## 391 WP1286 Metapathway biotransformation
## 392 WP1286 Metapathway biotransformation
## 393 WP1286 Metapathway biotransformation
## 394 WP1286 Metapathway biotransformation
## 395 WP1286 Metapathway biotransformation
## 396 WP1286 Metapathway biotransformation
## 397 WP1286 Metapathway biotransformation
## 398 WP1286 Metapathway biotransformation
## 399 WP1286 Metapathway biotransformation
## 400 WP1286 Metapathway biotransformation
## 401 WP1286 Metapathway biotransformation
## 402 WP1286 Metapathway biotransformation
## 403 WP1286 Metapathway biotransformation
## 404 WP1286 Metapathway biotransformation
## 405 WP1286 Metapathway biotransformation
## 406 WP1286 Metapathway biotransformation
## 407 WP1286 Metapathway biotransformation
## 408 WP1286 Metapathway biotransformation
## 409 WP1286 Metapathway biotransformation
## 410 WP1286 Metapathway biotransformation
## 411 WP1286 Metapathway biotransformation
## 412 WP1286 Metapathway biotransformation
## 413 WP1286 Metapathway biotransformation
## 414 WP1286 Metapathway biotransformation
## 415 WP1286 Metapathway biotransformation
## 416 WP1286 Metapathway biotransformation
## 417 WP1287 Amino acid conjugation of benzoic acid
## 418 WP1287 Amino acid conjugation of benzoic acid
## 419 WP1288 Wnt Signaling Pathway and Pluripotency
## 420 WP1288 Wnt Signaling Pathway and Pluripotency
## 421 WP1288 Wnt Signaling Pathway and Pluripotency
## 422 WP1288 Wnt Signaling Pathway and Pluripotency
## 423 WP1288 Wnt Signaling Pathway and Pluripotency
## 424 WP1288 Wnt Signaling Pathway and Pluripotency
## 425 WP1288 Wnt Signaling Pathway and Pluripotency
## 426 WP1288 Wnt Signaling Pathway and Pluripotency
## 427 WP1288 Wnt Signaling Pathway and Pluripotency
## 428 WP1288 Wnt Signaling Pathway and Pluripotency
## 429 WP1288 Wnt Signaling Pathway and Pluripotency
## 430 WP1288 Wnt Signaling Pathway and Pluripotency
## 431 WP1288 Wnt Signaling Pathway and Pluripotency
## 432 WP1288 Wnt Signaling Pathway and Pluripotency
## 433 WP1288 Wnt Signaling Pathway and Pluripotency
## 434 WP1288 Wnt Signaling Pathway and Pluripotency
## 435 WP1288 Wnt Signaling Pathway and Pluripotency
## 436 WP1288 Wnt Signaling Pathway and Pluripotency
## 437 WP1288 Wnt Signaling Pathway and Pluripotency
## 438 WP1288 Wnt Signaling Pathway and Pluripotency
## 439 WP1288 Wnt Signaling Pathway and Pluripotency
## 440 WP1288 Wnt Signaling Pathway and Pluripotency
## 441 WP1288 Wnt Signaling Pathway and Pluripotency
## 442 WP1288 Wnt Signaling Pathway and Pluripotency
## 443 WP1288 Wnt Signaling Pathway and Pluripotency
## 444 WP1288 Wnt Signaling Pathway and Pluripotency
## 445 WP1288 Wnt Signaling Pathway and Pluripotency
## 446 WP1288 Wnt Signaling Pathway and Pluripotency
## 447 WP1288 Wnt Signaling Pathway and Pluripotency
## 448 WP1288 Wnt Signaling Pathway and Pluripotency
## 449 WP1288 Wnt Signaling Pathway and Pluripotency
## 450 WP1288 Wnt Signaling Pathway and Pluripotency
## 451 WP1288 Wnt Signaling Pathway and Pluripotency
## 452 WP1288 Wnt Signaling Pathway and Pluripotency
## 453 WP1288 Wnt Signaling Pathway and Pluripotency
## 454 WP1288 Wnt Signaling Pathway and Pluripotency
## 455 WP1288 Wnt Signaling Pathway and Pluripotency
## 456 WP1288 Wnt Signaling Pathway and Pluripotency
## 457 WP1288 Wnt Signaling Pathway and Pluripotency
## 458 WP1288 Wnt Signaling Pathway and Pluripotency
## 459 WP1288 Wnt Signaling Pathway and Pluripotency
## 460 WP1288 Wnt Signaling Pathway and Pluripotency
## 461 WP1288 Wnt Signaling Pathway and Pluripotency
## 462 WP1288 Wnt Signaling Pathway and Pluripotency
## 463 WP1288 Wnt Signaling Pathway and Pluripotency
## 464 WP1288 Wnt Signaling Pathway and Pluripotency
## 465 WP1288 Wnt Signaling Pathway and Pluripotency
## 466 WP1288 Wnt Signaling Pathway and Pluripotency
## 467 WP1288 Wnt Signaling Pathway and Pluripotency
## 468 WP1288 Wnt Signaling Pathway and Pluripotency
## 469 WP1288 Wnt Signaling Pathway and Pluripotency
## 470 WP1288 Wnt Signaling Pathway and Pluripotency
## 471 WP1288 Wnt Signaling Pathway and Pluripotency
## 472 WP1288 Wnt Signaling Pathway and Pluripotency
## 473 WP1288 Wnt Signaling Pathway and Pluripotency
## 474 WP1288 Wnt Signaling Pathway and Pluripotency
## 475 WP1288 Wnt Signaling Pathway and Pluripotency
## 476 WP1288 Wnt Signaling Pathway and Pluripotency
## 477 WP1288 Wnt Signaling Pathway and Pluripotency
## 478 WP1288 Wnt Signaling Pathway and Pluripotency
## 479 WP1288 Wnt Signaling Pathway and Pluripotency
## 480 WP1288 Wnt Signaling Pathway and Pluripotency
## 481 WP1288 Wnt Signaling Pathway and Pluripotency
## 482 WP1288 Wnt Signaling Pathway and Pluripotency
## 483 WP1288 Wnt Signaling Pathway and Pluripotency
## 484 WP1288 Wnt Signaling Pathway and Pluripotency
## 485 WP1288 Wnt Signaling Pathway and Pluripotency
## 486 WP1288 Wnt Signaling Pathway and Pluripotency
## 487 WP1288 Wnt Signaling Pathway and Pluripotency
## 488 WP1288 Wnt Signaling Pathway and Pluripotency
## 489 WP1288 Wnt Signaling Pathway and Pluripotency
## 490 WP1288 Wnt Signaling Pathway and Pluripotency
## 491 WP1288 Wnt Signaling Pathway and Pluripotency
## 492 WP1288 Wnt Signaling Pathway and Pluripotency
## 493 WP1288 Wnt Signaling Pathway and Pluripotency
## 494 WP1288 Wnt Signaling Pathway and Pluripotency
## 495 WP1288 Wnt Signaling Pathway and Pluripotency
## 496 WP1288 Wnt Signaling Pathway and Pluripotency
## 497 WP1288 Wnt Signaling Pathway and Pluripotency
## 498 WP1288 Wnt Signaling Pathway and Pluripotency
## 499 WP1288 Wnt Signaling Pathway and Pluripotency
## 500 WP1288 Wnt Signaling Pathway and Pluripotency
## [ reached 'max' / getOption("max.print") -- omitted 6052 rows ]
DEG_pathways <- clusterProfiler::enricher(
entrezDEGs,
universe = entrez_IDs,
pAdjustMethod = "fdr",
pvalueCutoff = 0.05, #p.adjust cutoff
TERM2GENE = wpid2gene,
TERM2NAME = wpid2name)
DEG_pathways <- DOSE::setReadable(DEG_pathways, orgdb, keyType = "ENTREZID")
if (nrow(DEG_pathways@result %>% filter(p.adjust < 0.01 & qvalue < 0.05)) > 0) {
clusterProfiler::dotplot(DEG_pathways, showCategory = 20, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
} else { print("No significantly enriched terms using criteria selected") }
## wrong orderBy parameter; set to default `orderBy = "x"`
## Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
This section uses clusterProfiler enricher() to detect enriched gene sets using WikiPathways data.
plotList <- list()
for (i in seq_along(levels(significantResults$contrast))) {
entrezDEGs <- dplyr::left_join(significantResults %>% filter(contrast==levels(significantResults$contrast)[i]),
id_table_entrez,
by=biomart_filter)
entrezDEGs <- entrezDEGs$entrezgene_id
if (length(entrezDEGs) < 10) {
plotList[[i]] <- NA
next
}
DEG_pathways <- clusterProfiler::enricher(entrezDEGs,
universe = entrez_IDs,
pAdjustMethod = "fdr",
pvalueCutoff = 0.05, #p.adjust cutoff
TERM2GENE = wpid2gene,
TERM2NAME = wpid2name)
DEG_pathways <- DOSE::setReadable(DEG_pathways, orgdb, keyType = "ENTREZID")
if (nrow(DEG_pathways@result %>% filter(p.adjust < 0.01 & qvalue < 0.05)) > 0) {
plotList[[i]] <- clusterProfiler::dotplot(DEG_pathways, showCategory = 20, font.size=9) +
theme(axis.text.y = element_text(angle = 0)) +
scale_y_discrete(labels = function(x) stringr::str_wrap(x, width = 30))
} else { plotList[[i]] <- NA }
}
## wrong orderBy parameter; set to default `orderBy = "x"`
## Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
## wrong orderBy parameter; set to default `orderBy = "x"`
## Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
for (i in seq_along(levels(significantResults$contrast))) {
cat("###", levels(significantResults$contrast)[i], " \n\n")
if(any(!is.na(plotList[[i]]))) {print(plotList[[i]])} else {print("No pathway perturbations were detected within this contrast.")}
cat(' \n\n')
}
[1] “No pathway perturbations were detected within this contrast.”
This section uses clusterProfiler gseKEGG() to detect enriched KEGG pathways and enrichplot gseaplot2 to plot running scores.
plotListGSEA <- list()
plotListGSEA_escore <- list()
for (i in seq_along(levels(significantResults$contrast))) {
DEGs_full <- dplyr::left_join(significantResults %>% filter(contrast==levels(significantResults$contrast)[i]),
id_table_entrez,
by=biomart_filter)
foldChanges <- DEGs_full %>% dplyr::pull(log2FoldChange)
names(foldChanges) <- DEGs_full %>% dplyr::pull(entrezgene_id)
foldChanges <- foldChanges %>% sort() %>% rev()
foldChanges <- foldChanges[!is.na(names(foldChanges))]
if (length(foldChanges) < 10) {
plotListGSEA[[i]] <- NA
plotListGSEA_escore[[i]] <- NA
next
}
kk <- gseKEGG(foldChanges, keyType="ncbi-geneid", organism=kegg_organism)
gsea_sorted <- kk %>% filter(p.adjust < 0.05) %>% arrange(desc(enrichmentScore))
if(dim(kk@result)[1] > 0){
plotListGSEA[[i]] <- ridgeplot(kk)
plotListGSEA_escore[[i]] <- enrichplot::gseaplot2(gsea_sorted, geneSetID = c(seq_along(gsea_sorted$ID)))
} else {
plotListGSEA[[i]] <- NA
plotListGSEA_escore[[i]] <- NA
}
}
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (27.27% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (28.33% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
for (i in seq_along(levels(significantResults$contrast))) {
cat("###", levels(significantResults$contrast)[i], " \n\n")
if(any(!is.na(plotListGSEA[[i]]))) {
print(plotListGSEA[[i]])
} else {
print("No pathway perturbations were detected within this contrast.")
}
cat(' \n\n')
}
## Picking joint bandwidth of 0.5
[1] “No pathway perturbations were detected within this contrast.”
## Picking joint bandwidth of 0.0526
In this section, only the pathway with the top enrichment score is shown (for simplicity). Note that it is possible to plot any pathway of interest by altering the call to gseaplot.
for (i in 1:length(levels(significantResults$contrast))) {
cat("###", levels(significantResults$contrast)[i], " \n\n")
if(any(!is.na(plotListGSEA_escore[[i]]))) {
print(plotListGSEA_escore[[i]])
} else {
print("No pathway perturbations were detected within this contrast.")
}
cat(' \n\n')
}
[1] “No pathway perturbations were detected within this contrast.”
Please use this as a starting point for the bioinformatics/statistics section of any publications based on the data analyzed in this report. All of the information provided here is contained elsewhere in the report, but it is aggregated here for your convenience.
There were 40 samples for which data was collected. A count matrix containing the \(80\) samples sequenced in this experiment was imported into R for statistical analysis. After removing samples with less than \(10^{6}\) reads, 76 samples were left. Following the exclusion of other samples (which ones? Why? Please explain for your experiment, if applicable.), there were 39 samples remaining. Following the recommendations set out by the Omics Data Analysis Frameworks for Regulatory application (R-ODAF) guidelines, genes were filtered to include only those where 75% of at least one experimental group were above 1 CPM, and spurious spikes were removed in which (max - median) of counts were less than (sum of counts)/(number of replicates + 1). The samples excluded from analysis are shown in the table below. We used DESeq2 1.30.0 to test for differentially abundant genes within the RNA-Seq data. The log2FoldChange shrinkage procedure used was ashr. An alpha of 0.1 was used to extract raw results, which are reported as the Wald test p-value. To account for multiple testing, fdr adjusted p-values are reported. Cook’s cutoff was set to FALSE in this analysis. Differentially expressed genes (DEGs) were filtered using a linear fold change cutoff of 1.5 and adjusted p-value of 0.05.
## [1] "2020-12-15 12:11:00 EST"
| unlist(params) | |
|---|---|
| run_pathway_analysis | TRUE |
| projectdir | /mnt/nfs1/mmeier/projects/Template_RNASeq |
| project_name | Flame_retardants |
| species | rat |
| design | group |
| intgroup1 | group |
| intgroup2 | dose |
| flag | TRUE |
| platform | RNA-Seq |
| group_facet | sex |
| exclude_samples | NA |
| exclude_groups | NA |
| include_only_column | NA |
| include_only_group | NA |
| use_cached_RData | FALSE |
| cpus | 39 |
| group_filter | female |
## Time difference of 2.733 mins
R session information.## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.3 (2020-10-10)
## os Ubuntu 20.04.1 LTS
## system x86_64, linux-gnu
## ui RStudio
## language (EN)
## collate en_CA.UTF-8
## ctype en_CA.UTF-8
## tz America/Toronto
## date 2020-12-15
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## affy 1.68.0 2020-10-27 [1] Bioconductor
## affyio 1.60.0 2020-10-27 [1] Bioconductor
## annotate 1.68.0 2020-10-27 [1] Bioconductor
## AnnotationDbi * 1.52.0 2020-10-27 [1] Bioconductor
## AnnotationHub * 2.22.0 2020-10-27 [1] Bioconductor
## ashr 2.2-47 2020-02-20 [1] CRAN (R 4.0.2)
## askpass 1.1 2019-01-13 [1] CRAN (R 4.0.2)
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
## backports 1.2.0 2020-11-02 [1] CRAN (R 4.0.3)
## Biobase * 2.50.0 2020-10-27 [1] Bioconductor
## BiocFileCache * 1.14.0 2020-10-27 [1] Bioconductor
## BiocGenerics * 0.36.0 2020-10-27 [1] Bioconductor
## BiocManager 1.30.10 2019-11-16 [1] CRAN (R 4.0.2)
## BiocParallel * 1.24.1 2020-11-06 [1] Bioconductor
## BiocVersion 3.12.0 2020-04-27 [1] Bioconductor
## biomaRt * 2.46.0 2020-10-27 [1] Bioconductor
## bit 4.0.4 2020-08-04 [1] CRAN (R 4.0.2)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.0.2)
## bitops 1.0-6 2013-08-17 [1] CRAN (R 4.0.2)
## blob 1.2.1 2020-01-20 [1] CRAN (R 4.0.2)
## broom 0.7.2 2020-10-20 [1] CRAN (R 4.0.3)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.2)
## cli 2.2.0 2020-11-20 [1] CRAN (R 4.0.3)
## clusterProfiler * 3.18.0 2020-10-27 [1] Bioconductor
## colorspace 2.0-0 2020-11-11 [1] CRAN (R 4.0.3)
## cowplot 1.1.0 2020-09-08 [1] CRAN (R 4.0.2)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.2)
## crosstalk 1.1.0.1 2020-03-13 [1] CRAN (R 4.0.2)
## curl 4.3 2019-12-02 [1] CRAN (R 4.0.2)
## data.table * 1.13.2 2020-10-19 [1] CRAN (R 4.0.2)
## DBI 1.1.0 2019-12-15 [1] CRAN (R 4.0.2)
## dbplyr * 2.0.0 2020-11-03 [1] CRAN (R 4.0.3)
## DelayedArray 0.16.0 2020-10-27 [1] Bioconductor
## DESeq2 * 1.30.0 2020-10-27 [1] Bioconductor
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.3)
## DO.db 2.9 2020-10-02 [1] Bioconductor
## DOSE 3.16.0 2020-10-27 [1] Bioconductor
## downloader 0.4 2015-07-09 [1] CRAN (R 4.0.2)
## dplyr * 1.0.2 2020-08-18 [1] CRAN (R 4.0.2)
## DT * 0.16 2020-10-13 [1] CRAN (R 4.0.3)
## edgeR * 3.32.0 2020-10-27 [1] Bioconductor
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.2)
## enrichplot * 1.10.1 2020-11-14 [1] Bioconductor
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.2)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.2)
## farver 2.0.3 2020-01-16 [1] CRAN (R 4.0.2)
## fastmap 1.0.1 2019-10-08 [1] CRAN (R 4.0.2)
## fastmatch 1.1-0 2017-01-28 [1] CRAN (R 4.0.2)
## fgsea 1.16.0 2020-10-27 [1] Bioconductor
## forcats * 0.5.0 2020-03-01 [1] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## genefilter 1.72.0 2020-10-27 [1] Bioconductor
## geneplotter 1.68.0 2020-10-27 [1] Bioconductor
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.3)
## GenomeInfoDb * 1.26.1 2020-11-20 [1] Bioconductor
## GenomeInfoDbData 1.2.4 2020-11-30 [1] Bioconductor
## GenomicRanges * 1.42.0 2020-10-27 [1] Bioconductor
## ggforce 0.3.2 2020-06-23 [1] CRAN (R 4.0.2)
## ggnewscale 0.4.4 2020-12-02 [1] CRAN (R 4.0.3)
## ggplot2 * 3.3.2 2020-06-19 [1] CRAN (R 4.0.2)
## ggraph 2.0.4 2020-11-16 [1] CRAN (R 4.0.3)
## ggrepel 0.8.2 2020-03-08 [1] CRAN (R 4.0.2)
## ggridges 0.5.2 2020-01-12 [1] CRAN (R 4.0.2)
## ggupset 0.3.0 2020-05-05 [1] CRAN (R 4.0.3)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## GO.db 3.12.1 2020-11-30 [1] Bioconductor
## GOSemSim 2.16.1 2020-10-29 [1] Bioconductor
## graphlayouts 0.7.1 2020-10-26 [1] CRAN (R 4.0.3)
## gridExtra 2.3 2017-09-09 [1] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.2)
## haven 2.3.1 2020-06-01 [1] CRAN (R 4.0.2)
## here 1.0.0 2020-11-15 [1] CRAN (R 4.0.2)
## highr 0.8 2019-03-20 [1] CRAN (R 4.0.2)
## hms 0.5.3 2020-01-08 [1] CRAN (R 4.0.2)
## htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.2)
## htmlwidgets 1.5.2 2020-10-03 [1] CRAN (R 4.0.2)
## httpuv 1.5.4 2020-06-06 [1] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## igraph 1.2.6 2020-10-06 [1] CRAN (R 4.0.2)
## interactiveDisplayBase 1.28.0 2020-10-27 [1] Bioconductor
## invgamma 1.1 2017-05-07 [1] CRAN (R 4.0.2)
## IRanges * 2.24.0 2020-10-27 [1] Bioconductor
## irlba 2.3.3 2019-02-05 [1] CRAN (R 4.0.2)
## janeaustenr 0.1.5 2017-06-10 [1] CRAN (R 4.0.2)
## jsonlite 1.7.1 2020-09-07 [1] CRAN (R 4.0.2)
## kableExtra * 1.3.1 2020-10-22 [1] CRAN (R 4.0.2)
## knitr * 1.30 2020-09-22 [1] CRAN (R 4.0.2)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.3)
## later 1.1.0.1 2020-06-05 [1] CRAN (R 4.0.2)
## lattice * 0.20-41 2020-04-02 [1] CRAN (R 4.0.2)
## lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.0.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.2)
## limma * 3.46.0 2020-10-27 [1] Bioconductor
## locfit 1.5-9.4 2020-03-25 [1] CRAN (R 4.0.2)
## lubridate 1.7.9.2 2020-11-13 [1] CRAN (R 4.0.3)
## magrittr * 2.0.1 2020-11-17 [1] CRAN (R 4.0.3)
## MASS 7.3-53 2020-09-09 [4] CRAN (R 4.0.2)
## Matrix 1.2-18 2019-11-27 [4] CRAN (R 4.0.0)
## MatrixGenerics * 1.2.0 2020-10-27 [1] Bioconductor
## matrixStats * 0.57.0 2020-09-25 [1] CRAN (R 4.0.2)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.2)
## mime 0.9 2020-02-04 [1] CRAN (R 4.0.2)
## mixsqp 0.3-43 2020-05-14 [1] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.0.2)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.2)
## openssl 1.4.3 2020-09-18 [1] CRAN (R 4.0.2)
## openxlsx * 4.2.3 2020-10-27 [1] CRAN (R 4.0.2)
## org.Rn.eg.db * 3.12.0 2020-12-01 [1] Bioconductor
## pheatmap * 1.0.12 2019-01-04 [1] CRAN (R 4.0.2)
## pillar 1.4.7 2020-11-20 [1] CRAN (R 4.0.3)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
## plotly * 4.9.2.1 2020-04-04 [1] CRAN (R 4.0.2)
## plyr 1.8.6 2020-03-03 [1] CRAN (R 4.0.2)
## polyclip 1.10-0 2019-03-14 [1] CRAN (R 4.0.2)
## preprocessCore 1.52.0 2020-10-27 [1] Bioconductor
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.2)
## progress 1.2.2 2019-05-16 [1] CRAN (R 4.0.2)
## promises 1.1.1 2020-06-09 [1] CRAN (R 4.0.2)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
## qvalue 2.22.0 2020-10-27 [1] Bioconductor
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.3)
## rappdirs 0.3.1 2016-03-28 [1] CRAN (R 4.0.2)
## RColorBrewer * 1.1-2 2014-12-07 [1] CRAN (R 4.0.2)
## Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.2)
## RCurl 1.98-1.2 2020-04-18 [1] CRAN (R 4.0.2)
## readr * 1.4.0 2020-10-05 [1] CRAN (R 4.0.2)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 4.0.2)
## reprex 0.3.0 2019-05-16 [1] CRAN (R 4.0.2)
## reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.0.2)
## rjson 0.2.20 2018-06-08 [1] CRAN (R 4.0.2)
## rlang 0.4.9 2020-11-26 [1] CRAN (R 4.0.3)
## rmarkdown 2.5 2020-10-21 [1] CRAN (R 4.0.3)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
## RSQLite 2.2.1 2020-09-30 [1] CRAN (R 4.0.2)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.3)
## rvcheck 0.1.8 2020-03-01 [1] CRAN (R 4.0.2)
## rvest 0.3.6 2020-07-25 [1] CRAN (R 4.0.2)
## rWikiPathways * 1.10.0 2020-10-27 [1] Bioconductor
## S4Vectors * 0.28.0 2020-10-27 [1] Bioconductor
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.2)
## scatterpie 0.1.5 2020-09-09 [1] CRAN (R 4.0.2)
## sessioninfo * 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
## shadowtext 0.0.7 2019-11-06 [1] CRAN (R 4.0.3)
## shiny 1.5.0 2020-06-23 [1] CRAN (R 4.0.2)
## SnowballC 0.7.0 2020-04-01 [1] CRAN (R 4.0.2)
## SQUAREM 2020.5 2020-10-21 [1] CRAN (R 4.0.2)
## stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
## SummarizedExperiment * 1.20.0 2020-10-27 [1] Bioconductor
## survival 3.2-7 2020-09-28 [4] CRAN (R 4.0.2)
## tibble * 3.0.4 2020-10-12 [1] CRAN (R 4.0.3)
## tidygraph 1.2.0 2020-05-12 [1] CRAN (R 4.0.2)
## tidyr * 1.1.2 2020-08-27 [1] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.2)
## tidytext * 0.2.6 2020-09-20 [1] CRAN (R 4.0.2)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 4.0.2)
## tinytex 0.27 2020-11-01 [1] CRAN (R 4.0.3)
## tokenizers 0.2.1 2018-03-29 [1] CRAN (R 4.0.2)
## truncnorm 1.0-8 2018-02-27 [1] CRAN (R 4.0.2)
## tweenr 1.0.1 2018-12-14 [1] CRAN (R 4.0.2)
## vctrs 0.3.5 2020-11-17 [1] CRAN (R 4.0.3)
## viridis * 0.5.1 2018-03-29 [1] CRAN (R 4.0.2)
## viridisLite * 0.3.0 2018-02-01 [1] CRAN (R 4.0.2)
## vsn * 3.58.0 2020-10-27 [1] Bioconductor
## webshot 0.5.2 2019-11-22 [1] CRAN (R 4.0.2)
## withr 2.3.0 2020-09-22 [1] CRAN (R 4.0.2)
## xfun 0.19 2020-10-30 [1] CRAN (R 4.0.3)
## XML 3.99-0.5 2020-07-23 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.0.2)
## XVector 0.30.0 2020-10-27 [1] Bioconductor
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
## zip 2.1.1 2020-08-27 [1] CRAN (R 4.0.2)
## zlibbioc 1.36.0 2020-10-27 [1] Bioconductor
##
## [1] /home/mmeier/R/x86_64-pc-linux-gnu-library/4.0
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library
# Save complete workspace
if(is.na(params$group_facet)) {
save.image(file = file.path(paths$RData, "Complete_analysis.RData"))
} else {
save.image(file = file.path(paths$RData, paste0("Complete_analysis_", params$group_filter, ".RData")))
}
# Add a flag to the start of the script with if statements in all code chunks to check if the flag is set.
# If flag is TRUE, run analysis; if flag is FALSE, SKIP analysis and continue here (just to modify plotting output).